diff options
Diffstat (limited to 'docs/generator/checklinks.sh')
-rwxr-xr-x | docs/generator/checklinks.sh | 394 |
1 files changed, 394 insertions, 0 deletions
diff --git a/docs/generator/checklinks.sh b/docs/generator/checklinks.sh new file mode 100755 index 0000000..d0c3b16 --- /dev/null +++ b/docs/generator/checklinks.sh @@ -0,0 +1,394 @@ +#!/bin/bash +# shellcheck disable=SC2181 + +# Doc link checker +# Validates and tries to fix all links that will cause issues either in the repo, or in the html site + +GENERATOR_DIR="docs/generator" + +dbg () { + if [ "$VERBOSE" -eq 1 ] ; then printf "%s\\n" "${1}" ; fi +} + +printhelp () { + echo "Usage: docs/generator/checklinks.sh [-r OR -f <fname>] [OPTIONS] + -r Recursively check all mds in all child directories, except docs/generator and node_modules (which is generatord by netlify) + -f Just check the passed md file + General Options: + -x Execute commands. By default the script runs in test mode with no files changed by the script (results and fixes are just shown). Use -x to have it apply the changes. + -u trys to follow URLs using curl + -v Outputs debugging messages + By default, nothing is actually checked. The following options tell it what to check: + -a Check all link types + -w Check wiki links (and just warn if you see one) + -b Check absolute links to the netdata repo (and change them to relative). Only checks links to https://github.com/netdata/netdata/????/master* + -l Check relative links to the netdata repo (and replace them with links that the html static site can live with, under docs/generator/src only) + -e Check external links, outside the wiki or the repo (useless without adding the -u option, to verify that they're not broken) + " +} + +fix () { + if [ "$EXECUTE" -eq 0 ] ; then + echo "-- SHOULD EXECUTE: $1" + else + dbg "-- EXECUTING: $1" + eval "$1" + fi +} + +ck_netdata_absolute () { + f=$1 + alnk=$2 + lnkinfile=$3 + testURL "$alnk" + + if [[ $f =~ ^(.*)/([^/]*)$ ]] ; then + fpath="${BASH_REMATCH[1]}" + dbg "-- Current file is at $fpath" + fi + + if [ $? -eq 0 ] ; then + rlnk=$(echo "$alnk" | sed 's/https:\/\/github.com\/netdata\/netdata\/....\/master\///g') + case $rlnk in + \#* ) dbg "-- (#somelink)" ;; + */ ) dbg "-- # (path/)" ;; + */#* ) dbg "-- # (path/#somelink)" ;; + */*.md ) dbg "-- # (path/filename.md)" ;; + */*.md#* ) dbg "-- # (path/filename.md#somelink)" ;; + *#* ) + dbg "-- # (path#somelink) -> (path/#somelink)" + if [[ $rlnk =~ ^(.*)#(.*)$ ]] ; then + dbg "-- $rlnk -> ${BASH_REMATCH[1]}/#${BASH_REMATCH[2]}" + rlnk="${BASH_REMATCH[1]}/#${BASH_REMATCH[2]}" + fi + ;; + * ) + if [ -f "$rlnk" ] ; then + dbg "-- # (path/someotherfile) $rlnk" + else + if [ -d "$rlnk" ] ; then + dbg "-- # (path) -> (path/)" + rlnk="$rlnk/" + else + echo "-- ERROR: $f - $alnk is neither a file nor a directory. Giving up!" + EXITCODE=1 + return + fi + fi + ;; + esac + + if [[ $rlnk =~ ^(.*)/([^/]*)$ ]] ; then + abspath="${BASH_REMATCH[1]}" + rest="${BASH_REMATCH[2]}" + dbg "-- Target file is at $abspath" + fi + relativelink=$(realpath --relative-to="$fpath" "$abspath") + if [ $? -eq 0 ] ; then + srch=$(echo "$lnkinfile" | sed 's/\//\\\//g') + if [ "$relativelink" = "." ] ; then + rplc=$(echo "$rest" | sed 's/\//\\\//g') + else + rplc=$(echo "$relativelink/$rest" | sed 's/\//\\\//g') + fi + fix "sed -i 's/($srch)/($rplc)/g' $f" + else + echo "-- ERROR: $f - Can't determine relative path of $alnk" + fi + else + echo "-- ERROR: $f - $alnk is a broken link" + EXITCODE=1 + return + fi +} + +testURL () { + if [ "$TESTURLS" -eq 0 ] ; then return 0 ; fi + dbg "-- Testing URL $1" + curl -sS "$1" > /dev/null + if [ $? -gt 0 ] ; then + return 1 + fi + return 0 +} + +testinternal () { + # Check if the header referred to by the internal link exists in the same file + ff=${1} + ifile=${2} + ilnk=${3} + header=${ilnk//-/} + dbg "-- Searching for \"$header\" in $ifile" + tr -d '[],_.:? `'< "$ifile" | sed 's/-//g' | grep -i "^\\#*$header\$" >/dev/null + if [ $? -eq 0 ] ; then + dbg "-- $ilnk found in $ifile" + return 0 + else + echo "-- ERROR: $ff - $ilnk header not found in file $ifile" + EXITCODE=1 + return 1 + fi +} + +testf () { + sf=$1 + tf=$2 + + if [ -f "$tf" ] ; then + dbg "-- $tf exists" + return 0 + else + echo "-- ERROR: $sf - $tf does not exist" + EXITCODE=1 + return 1 + fi +} + +ck_netdata_relative () { + f=${1} + rlnk=${2} + dbg "-- Checking relative link $rlnk" + fpath="." + fname="$f" + # First ensure that the link works in the repo, then try to fix it in htmldocs + if [[ $f =~ ^(.*)/([^/]*)$ ]] ; then + fpath="${BASH_REMATCH[1]}" + fname="${BASH_REMATCH[2]}" + dbg "-- Current file is at $fpath" + else + dbg "-- Current file is at root directory" + fi + # Cases to handle: + # (#somelink) + # (path/) + # (path/#somelink) + # (path/filename.md) -> htmldoc (path/filename/) + # (path/filename.md#somelink) -> htmldoc (path/filename/#somelink) + # (path#somelink) -> htmldoc (path/#somelink) + # (path/someotherfile) -> htmldoc (absolutelink) + # (path) -> htmldoc (path/) + + TRGT="" + s="" + + case "$rlnk" in + \#* ) + dbg "-- # (#somelink)" + testinternal "$f" "$f" "$rlnk" + ;; + */ ) + dbg "-- # (path/)" + TRGT="$fpath/${rlnk}README.md" + testf "$f" "$TRGT" + if [ $? -eq 0 ] ; then + if [ "$fname" != "README.md" ] ; then s="../$rlnk"; fi + fi + ;; + */\#* ) + dbg "-- # (path/#somelink)" + if [[ $rlnk =~ ^(.*)/#(.*)$ ]] ; then + TRGT="$fpath/${BASH_REMATCH[1]}/README.md" + LNK="#${BASH_REMATCH[2]}" + dbg "-- Look for $LNK in $TRGT" + testf "$f" "$TRGT" + if [ $? -eq 0 ] ; then + testinternal "$f" "$TRGT" "$LNK" + if [ $? -eq 0 ] ; then + if [ "$fname" != "README.md" ] ; then s="../$rlnk"; fi + fi + fi + fi + ;; + *.md ) + dbg "-- # (path/filename.md) -> htmldoc (path/filename/)" + testf "$f" "$fpath/$rlnk" + if [ $? -eq 0 ] ; then + if [[ $rlnk =~ ^(.*)/(.*).md$ ]] ; then + if [ "${BASH_REMATCH[2]}" = "README" ] ; then + s="../${BASH_REMATCH[1]}/" + else + s="../${BASH_REMATCH[1]}/${BASH_REMATCH[2]}/" + fi + if [ "$fname" != "README.md" ] ; then s="../$s"; fi + fi + fi + ;; + *.md\#* ) + dbg "-- # (path/filename.md#somelink) -> htmldoc (path/filename/#somelink)" + if [[ $rlnk =~ ^(.*)#(.*)$ ]] ; then + TRGT="$fpath/${BASH_REMATCH[1]}" + LNK="#${BASH_REMATCH[2]}" + testf "$f" "$TRGT" + if [ $? -eq 0 ] ; then + testinternal "$f" "$TRGT" "$LNK" + if [ $? -eq 0 ] ; then + if [[ $lnk =~ ^(.*)/(.*).md#(.*)$ ]] ; then + if [ "${BASH_REMATCH[2]}" = "README" ] ; then + s="../${BASH_REMATCH[1]}/#${BASH_REMATCH[3]}" + else + s="../${BASH_REMATCH[1]}/${BASH_REMATCH[2]}/#${BASH_REMATCH[3]}" + fi + if [ "$fname" != "README.md" ] ; then s="../$s"; fi + fi + fi + fi + fi + ;; + *\#* ) + dbg "-- # (path#somelink) -> (path/#somelink)" + if [[ $rlnk =~ ^(.*)#(.*)$ ]] ; then + TRGT="$fpath/${BASH_REMATCH[1]}/README.md" + LNK="#${BASH_REMATCH[2]}" + testf "$f" "$TRGT" + if [ $? -eq 0 ] ; then + testinternal "$f" "$TRGT" "$LNK" + if [ $? -eq 0 ] ; then + if [[ $rlnk =~ ^(.*)#(.*)$ ]] ; then + s="${BASH_REMATCH[1]}/#${BASH_REMATCH[2]}" + if [ "$fname" != "README.md" ] ; then s="../$s"; fi + fi + fi + fi + fi + ;; + * ) + if [ -f "$fpath/$rlnk" ] ; then + dbg "-- # (path/someotherfile) $rlnk" + if [ "$fpath" = "." ] ; then + s="https://github.com/netdata/netdata/tree/master/$rlnk" + else + s="https://github.com/netdata/netdata/tree/master/$fpath/$rlnk" + fi + else + if [ -d "$fpath/$rlnk" ] ; then + dbg "-- # (path) -> htmldoc (path/)" + testf "$f" "$fpath/$rlnk/README.md" + if [ $? -eq 0 ] ; then + s="$rlnk/" + if [ "$fname" != "README.md" ] ; then s="../$s"; fi + fi + else + echo "-- ERROR: $f - $rlnk is neither a file or a directory. Giving up!" + EXITCODE=1 + fi + fi + ;; + esac + + if [[ ! -z $s ]] ; then + srch=$(echo "$rlnk" | sed 's/\//\\\//g') + rplc=$(echo "$s" | sed 's/\//\\\//g') + fix "sed -i 's/($srch)/($rplc)/g' $GENERATOR_DIR/src/$f" + fi +} + + +checklinks () { + f=$1 + dbg "Checking $f" + while read -r l ; do + for word in $l ; do + if [[ $word =~ .*\]\(([^\(\) ]*)\).* ]] ; then + lnk="${BASH_REMATCH[1]}" + if [ -z "$lnk" ] ; then continue ; fi + dbg "-$lnk" + case "$lnk" in + mailto:* ) dbg "-- Mailto link, ignoring" ;; + https://github.com/netdata/netdata/wiki* ) + dbg "-- Wiki Link $lnk" + if [ "$CHKWIKI" -eq 1 ] ; then echo "-- WARNING: $f - $lnk points to the wiki. Please replace it manually" ; fi + ;; + https://github.com/netdata/netdata/????/master* ) + dbg "-- Absolute link $lnk" + if [ "$CHKABSOLUTE" -eq 1 ] ; then ck_netdata_absolute "$f" "$lnk" "$lnk" ; fi + ;; + http* ) + dbg "-- External link $lnk" + if [ "$CHKEXTERNAL" -eq 1 ] ; then + testURL "$lnk" + if [ $? -eq 1 ] ; then + echo "-- ERROR: $f - $lnk is a broken link" + EXITCODE=1 + fi + fi + ;; + * ) + dbg "-- Relative link $lnk" + if [ "$CHKRELATIVE" -eq 1 ] ; then ck_netdata_relative "$f" "$lnk" ; fi + ;; + esac + fi + done + done < "$f" +} + +TESTURLS=0 +VERBOSE=0 +RECURSIVE=0 +EXECUTE=0 +CHKWIKI=0 +CHKABSOLUTE=0 +CHKEXTERNAL=0 +CHKRELATIVE=0 +while getopts :f:rxuvwbela option +do + case "$option" in + f) + file=$OPTARG + ;; + r) + RECURSIVE=1 + ;; + x) + EXECUTE=1 + ;; + u) + TESTURLS=1 + ;; + v) + VERBOSE=1 + ;; + w) + CHKWIKI=1 + ;; + b) + CHKABSOLUTE=1 + ;; + e) + CHKEXTERNAL=1 + ;; + l) + CHKRELATIVE=1 + ;; + a) + CHKWIKI=1 + CHKABSOLUTE=1 + CHKEXTERNAL=1 + CHKRELATIVE=1 + ;; + *) + printhelp + exit 1 + ;; + esac +done + +EXITCODE=0 + +if [ -z "${file}" ] ; then + if [ $RECURSIVE -eq 0 ] ; then + printhelp + exit 1 + fi + for f in $(find . -type d \( -path ./${GENERATOR_DIR} -o -path ./node_modules \) -prune -o -name "*.md" -print); do + checklinks "$f" + done +else + if [ $RECURSIVE -eq 1 ] ; then + printhelp + exit 1 + fi + checklinks "$file" +fi + +exit $EXITCODE |