diff options
Diffstat (limited to '')
-rw-r--r-- | src/bldprogs/scmparser.cpp | 1199 |
1 files changed, 1199 insertions, 0 deletions
diff --git a/src/bldprogs/scmparser.cpp b/src/bldprogs/scmparser.cpp new file mode 100644 index 00000000..bf1cdd6c --- /dev/null +++ b/src/bldprogs/scmparser.cpp @@ -0,0 +1,1199 @@ +/* $Id: scmparser.cpp $ */ +/** @file + * IPRT Testcase / Tool - Source Code Massager, Code Parsers. + */ + +/* + * Copyright (C) 2010-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include <iprt/assert.h> +#include <iprt/ctype.h> +#include <iprt/dir.h> +#include <iprt/env.h> +#include <iprt/file.h> +#include <iprt/errcore.h> +#include <iprt/getopt.h> +#include <iprt/initterm.h> +#include <iprt/mem.h> +#include <iprt/message.h> +#include <iprt/param.h> +#include <iprt/path.h> +#include <iprt/process.h> +#include <iprt/stream.h> +#include <iprt/string.h> + +#include "scm.h" + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +typedef size_t (*PFNISCOMMENT)(const char *pchLine, size_t cchLine, bool fSecond); + + +/** + * Callback for checking if C++ line comment. + */ +static size_t isCppLineComment(const char *pchLine, size_t cchLine, bool fSecond) +{ + if ( cchLine >= 2 + && pchLine[0] == '/' + && pchLine[1] == '/') + { + if (!fSecond) + return 2; + if (cchLine >= 3 && pchLine[2] == '/') + return 3; + } + return 0; +} + + +/** + * Callback for checking if hash comment. + */ +static size_t isHashComment(const char *pchLine, size_t cchLine, bool fSecond) +{ + if (cchLine >= 1 && *pchLine == '#') + { + if (!fSecond) + return 1; + if (cchLine >= 2 && pchLine[1] == '#') + return 2; + } + return 0; +} + + +/** + * Callback for checking if semicolon comment. + */ +static size_t isSemicolonComment(const char *pchLine, size_t cchLine, bool fSecond) +{ + if (cchLine >= 1 && *pchLine == ';') + { + if (!fSecond) + return 1; + if (cchLine >= 2 && pchLine[1] == ';') + return 2; + } + return 0; +} + + +/** Macro for checking for a XML comment start. */ +#define IS_XML_COMMENT_START(a_pch, a_off, a_cch) \ + ( (a_off) + 4 <= (a_cch) \ + && (a_pch)[(a_off) ] == '<' \ + && (a_pch)[(a_off) + 1] == '!' \ + && (a_pch)[(a_off) + 2] == '-' \ + && (a_pch)[(a_off) + 3] == '-' \ + && ((a_off) + 4 == (a_cch) || RT_C_IS_SPACE((a_pch)[(a_off) + 4])) ) + +/** Macro for checking for a XML comment end. */ +#define IS_XML_COMMENT_END(a_pch, a_off, a_cch) \ + ( (a_off) + 3 <= (a_cch) \ + && (a_pch)[(a_off) ] == '-' \ + && (a_pch)[(a_off) + 1] == '-' \ + && (a_pch)[(a_off) + 2] == '>') + + +/** Macro for checking for a batch file comment prefix. */ +#define IS_REM(a_pch, a_off, a_cch) \ + ( (a_off) + 3 <= (a_cch) \ + && ((a_pch)[(a_off) ] == 'R' || (a_pch)[(a_off) ] == 'r') \ + && ((a_pch)[(a_off) + 1] == 'E' || (a_pch)[(a_off) + 1] == 'e') \ + && ((a_pch)[(a_off) + 2] == 'M' || (a_pch)[(a_off) + 2] == 'm') \ + && ((a_off) + 3 == (a_cch) || RT_C_IS_SPACE((a_pch)[(a_off) + 3])) ) + + +/** + * Callback for checking if batch comment. + */ +static size_t isBatchComment(const char *pchLine, size_t cchLine, bool fSecond) +{ + if (!fSecond) + { + if (IS_REM(pchLine, 0, cchLine)) + return 3; + } + else + { + /* Check for the 2nd in "rem rem" lines. */ + if ( cchLine >= 4 + && RT_C_IS_SPACE(*pchLine) + && IS_REM(pchLine, 1, cchLine)) + return 4; + } + return 0; +} + +/** + * Callback for checking if SQL comment. + */ +static size_t isSqlComment(const char *pchLine, size_t cchLine, bool fSecond) +{ + if ( cchLine >= 2 + && pchLine[0] == '-' + && pchLine[1] == '-') + { + if (!fSecond) + return 2; + if ( cchLine >= 3 + && pchLine[2] == '-') + return 3; + } + return 0; +} + +/** + * Callback for checking if tick comment. + */ +static size_t isTickComment(const char *pchLine, size_t cchLine, bool fSecond) +{ + if (cchLine >= 1 && *pchLine == '\'') + { + if (!fSecond) + return 1; + if (cchLine >= 2 && pchLine[1] == '\'') + return 2; + } + return 0; +} + + +/** + * Common worker for enumeratePythonComments and enumerateSimpleLineComments. + * + * @returns IPRT status code. + * @param pIn The input stream. + * @param pfnIsComment Comment tester function. + * @param pfnCallback The callback. + * @param pvUser The user argument for the callback. + * @param ppchLine Pointer to the line variable. + * @param pcchLine Pointer to the line length variable. + * @param penmEol Pointer to the line ending type variable. + * @param piLine Pointer to the line number variable. + * @param poff Pointer to the line offset variable. On input this + * is positioned at the start of the comment. + */ +static int handleLineComment(PSCMSTREAM pIn, PFNISCOMMENT pfnIsComment, + PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser, + const char **ppchLine, size_t *pcchLine, PSCMEOL penmEol, + uint32_t *piLine, size_t *poff) +{ + /* Unpack input/output variables. */ + uint32_t iLine = *piLine; + const char *pchLine = *ppchLine; + size_t cchLine = *pcchLine; + size_t off = *poff; + SCMEOL enmEol = *penmEol; + + /* + * Take down the basic info about the comment. + */ + SCMCOMMENTINFO Info; + Info.iLineStart = iLine; + Info.iLineEnd = iLine; + Info.offStart = (uint32_t)off; + Info.offEnd = (uint32_t)cchLine; + + size_t cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false); + Assert(cchSkip > 0); + off += cchSkip; + + /* Determine comment type. */ + Info.enmType = kScmCommentType_Line; + char ch; + cchSkip = 1; + if ( off < cchLine + && ( (ch = pchLine[off]) == '!' + || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, true)) > 0) ) + { + unsigned ch2; + if ( off + cchSkip == cchLine + || RT_C_IS_SPACE(ch2 = pchLine[off + cchSkip]) ) + { + Info.enmType = ch != '!' ? kScmCommentType_Line_JavaDoc : kScmCommentType_Line_Qt; + off += cchSkip; + } + else if ( ch2 == '<' + && ( off + cchSkip + 1 == cchLine + || RT_C_IS_SPACE(pchLine[off + cchSkip + 1]) )) + { + Info.enmType = ch == '!' ? kScmCommentType_Line_JavaDoc_After : kScmCommentType_Line_Qt_After; + off += cchSkip + 1; + } + } + + /* + * Copy body of the first line. Like for C, we ignore a single space in the first comment line. + */ + if (off < cchLine && RT_C_IS_SPACE(pchLine[off])) + off++; + size_t cchBody = cchLine; + while (cchBody > off && RT_C_IS_SPACE(pchLine[cchBody - 1])) + cchBody--; + cchBody -= off; + size_t cbBodyAlloc = RT_MAX(_1K, RT_ALIGN_Z(cchBody + 64, 128)); + char *pszBody = (char *)RTMemAlloc(cbBodyAlloc); + if (!pszBody) + return VERR_NO_MEMORY; + memcpy(pszBody, &pchLine[off], cchBody); + pszBody[cchBody] = '\0'; + + Info.cBlankLinesBefore = cchBody == 0; + + /* + * Look for more comment lines and append them to the body. + */ + while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL) + { + iLine++; + + /* Skip leading spaces. */ + off = 0; + while (off < cchLine && RT_C_IS_SPACE(pchLine[off])) + off++; + + /* Check if it's a comment. */ + if ( off >= cchLine + || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false)) == 0) + break; + off += cchSkip; + + /* Split on doxygen comment start (if not already in one). */ + if ( Info.enmType == kScmCommentType_Line + && off + 1 < cchLine + && ( pfnIsComment(&pchLine[off], cchLine - off, true) > 0 + || ( pchLine[off + 1] == '!' + && ( off + 2 == cchLine + || pchLine[off + 2] != '!') ) ) ) + { + off -= cchSkip; + break; + } + + /* Append the body w/o trailing spaces and some leading ones. */ + if (off < cchLine && RT_C_IS_SPACE(pchLine[off])) + off++; + while (off < cchLine && off < Info.offStart + 3 && RT_C_IS_SPACE(pchLine[off])) + off++; + size_t cchAppend = cchLine; + while (cchAppend > off && RT_C_IS_SPACE(pchLine[cchAppend - 1])) + cchAppend--; + cchAppend -= off; + + size_t cchNewBody = cchBody + 1 + cchAppend; + if (cchNewBody >= cbBodyAlloc) + { + cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128)); + void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc); + if (pvNew) + pszBody = (char *)pvNew; + else + { + RTMemFree(pszBody); + return VERR_NO_MEMORY; + } + } + + if ( cchBody > 0 + || cchAppend > 0) + { + if (cchBody > 0) + pszBody[cchBody++] = '\n'; + memcpy(&pszBody[cchBody], &pchLine[off], cchAppend); + cchBody += cchAppend; + pszBody[cchBody] = '\0'; + } + else + Info.cBlankLinesBefore++; + + /* Advance. */ + Info.offEnd = (uint32_t)cchLine; + Info.iLineEnd = iLine; + } + + /* + * Strip trailing empty lines in the body. + */ + Info.cBlankLinesAfter = 0; + while (cchBody >= 1 && pszBody[cchBody - 1] == '\n') + { + Info.cBlankLinesAfter++; + pszBody[--cchBody] = '\0'; + } + + /* + * Do the callback and return. + */ + int rc = pfnCallback(&Info, pszBody, cchBody, pvUser); + + RTMemFree(pszBody); + + *piLine = iLine; + *ppchLine = pchLine; + *pcchLine = cchLine; + *poff = off; + *penmEol = enmEol; + return rc; +} + + + +/** + * Common string literal handler. + * + * @returns new pchLine value. + * @param pIn The input string. + * @param chType The quotation type. + * @param pchLine The current line. + * @param ppchLine Pointer to the line variable. + * @param pcchLine Pointer to the line length variable. + * @param penmEol Pointer to the line ending type variable. + * @param piLine Pointer to the line number variable. + * @param poff Pointer to the line offset variable. + */ +static const char *handleStringLiteral(PSCMSTREAM pIn, char chType, const char *pchLine, size_t *pcchLine, PSCMEOL penmEol, + uint32_t *piLine, size_t *poff) +{ + size_t off = *poff; + for (;;) + { + bool fEnd = false; + bool fEscaped = false; + size_t const cchLine = *pcchLine; + while (off < cchLine) + { + char ch = pchLine[off++]; + if (!fEscaped) + { + if (ch != chType) + { + if (ch != '\\') + { /* likely */ } + else + fEscaped = true; + } + else + { + fEnd = true; + break; + } + } + else + fEscaped = false; + } + if (fEnd) + break; + + /* next line */ + pchLine = ScmStreamGetLine(pIn, pcchLine, penmEol); + if (!pchLine) + break; + *piLine += 1; + off = 0; + } + + *poff = off; + return pchLine; +} + + +/** + * Deals with comments in C and C++ code. + * + * @returns VBox status code / callback return code. + * @param pIn The stream to parse. + * @param pfnCallback The callback. + * @param pvUser The user parameter for the callback. + */ +static int enumerateCStyleComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser) +{ + AssertCompile('\'' < '/'); + AssertCompile('"' < '/'); + + int rcRet = VINF_SUCCESS; + uint32_t iLine = 0; + SCMEOL enmEol; + size_t cchLine; + const char *pchLine; + while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL) + { + size_t off = 0; + while (off < cchLine) + { + unsigned ch = pchLine[off++]; + if (ch > (unsigned)'/') + { /* not interesting */ } + else if (ch == '/') + { + if (off < cchLine) + { + ch = pchLine[off++]; + if (ch == '*') + { + /* + * Multiline comment. Find the end. + * + * Note! This is very similar to the python doc string handling further down. + */ + SCMCOMMENTINFO Info; + Info.iLineStart = iLine; + Info.offStart = (uint32_t)off - 2; + Info.iLineEnd = UINT32_MAX; + Info.offEnd = UINT32_MAX; + Info.cBlankLinesBefore = 0; + + /* Determine comment type (same as for line-comments). */ + Info.enmType = kScmCommentType_MultiLine; + if ( off < cchLine + && ( (ch = pchLine[off]) == '*' + || ch == '!') ) + { + unsigned ch2; + if ( off + 1 == cchLine + || RT_C_IS_SPACE(ch2 = pchLine[off + 1]) ) + { + Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc : kScmCommentType_MultiLine_Qt; + off += 1; + } + else if ( ch2 == '<' + && ( off + 2 == cchLine + || RT_C_IS_SPACE(pchLine[off + 2]) )) + { + Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc_After + : kScmCommentType_MultiLine_Qt_After; + off += 2; + } + } + + /* + * Copy the body and find the end of the multiline comment. + */ + size_t cbBodyAlloc = 0; + size_t cchBody = 0; + char *pszBody = NULL; + for (;;) + { + /* Parse the line up to the end-of-comment or end-of-line. */ + size_t offLineStart = off; + size_t offLastNonBlank = off; + size_t offFirstNonBlank = ~(size_t)0; + while (off < cchLine) + { + ch = pchLine[off++]; + if (ch != '*' || off >= cchLine || pchLine[off] != '/') + { + if (RT_C_IS_BLANK(ch)) + {/* kind of likely */} + else + { + offLastNonBlank = off - 1; + if (offFirstNonBlank != ~(size_t)0) + {/* likely */} + else if ( ch != '*' /* ignore continuation-asterisks */ + || off > Info.offStart + 1 + 1 + || off > cchLine + || ( off < cchLine + && !RT_C_IS_SPACE(pchLine[off])) + || pszBody == NULL) + offFirstNonBlank = off - 1; + } + } + else + { + Info.offEnd = (uint32_t)++off; + Info.iLineEnd = iLine; + break; + } + } + + /* Append line content to the comment body string. */ + size_t cchAppend; + if (offFirstNonBlank == ~(size_t)0) + cchAppend = 0; /* empty line */ + else + { + if (pszBody) + offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank); + else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */ + offLineStart++; + cchAppend = offLastNonBlank + 1 - offLineStart; + Assert(cchAppend <= cchLine); + } + + size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend; + if (cchNewBody >= cbBodyAlloc) + { + cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128)); + void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc); + if (pvNew) + pszBody = (char *)pvNew; + else + { + RTMemFree(pszBody); + return VERR_NO_MEMORY; + } + } + + if (cchBody > 0) /* no leading blank lines */ + pszBody[cchBody++] = '\n'; + else if (cchAppend == 0) + Info.cBlankLinesBefore++; + memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend); + cchBody += cchAppend; + pszBody[cchBody] = '\0'; + + /* Advance to the next line, if we haven't yet seen the end of this comment. */ + if (Info.iLineEnd != UINT32_MAX) + break; + pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol); + if (!pchLine) + { + Info.offEnd = (uint32_t)cchLine; + Info.iLineEnd = iLine; + break; + } + iLine++; + off = 0; + } + + /* Strip trailing empty lines in the body. */ + Info.cBlankLinesAfter = 0; + while (cchBody >= 1 && pszBody[cchBody - 1] == '\n') + { + Info.cBlankLinesAfter++; + pszBody[--cchBody] = '\0'; + } + + /* Do the callback. */ + int rc = pfnCallback(&Info, pszBody, cchBody, pvUser); + RTMemFree(pszBody); + if (RT_FAILURE(rc)) + return rc; + if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS) + rcRet = rc; + } + else if (ch == '/') + { + /* + * Line comment. Join the other line comment guys. + */ + off -= 2; + int rc = handleLineComment(pIn, isCppLineComment, pfnCallback, pvUser, + &pchLine, &cchLine, &enmEol, &iLine, &off); + if (RT_FAILURE(rc)) + return rc; + if (rcRet == VINF_SUCCESS) + rcRet = rc; + } + + if (!pchLine) + break; + } + } + else if (ch == '"') + { + /* + * String literal may include sequences that looks like comments. So, + * they needs special handling to avoid confusion. + */ + pchLine = handleStringLiteral(pIn, '"', pchLine, &cchLine, &enmEol, &iLine, &off); + } + /* else: We don't have to deal with character literal as these shouldn't + include comment-like sequences. */ + } /* for each character in the line */ + + iLine++; + } /* for each line in the stream */ + + int rcStream = ScmStreamGetStatus(pIn); + if (RT_SUCCESS(rcStream)) + return rcRet; + return rcStream; +} + + +/** + * Deals with comments in Python code. + * + * @returns VBox status code / callback return code. + * @param pIn The stream to parse. + * @param pfnCallback The callback. + * @param pvUser The user parameter for the callback. + */ +static int enumeratePythonComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser) +{ + AssertCompile('#' < '\''); + AssertCompile('"' < '\''); + + int rcRet = VINF_SUCCESS; + uint32_t iLine = 0; + SCMEOL enmEol; + size_t cchLine; + const char *pchLine; + SCMCOMMENTINFO Info; + while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL) + { + size_t off = 0; + while (off < cchLine) + { + char ch = pchLine[off++]; + if ((unsigned char)ch > (unsigned char)'\'') + { /* not interesting */ } + else if (ch == '#') + { + /* + * Line comment. Join paths with the others. + */ + off -= 1; + int rc = handleLineComment(pIn, isHashComment, pfnCallback, pvUser, + &pchLine, &cchLine, &enmEol, &iLine, &off); + if (RT_FAILURE(rc)) + return rc; + if (rcRet == VINF_SUCCESS) + rcRet = rc; + + if (!pchLine) + break; + } + else if (ch == '"' || ch == '\'') + { + /* + * String literal may be doc strings and they may legally include hashes. + */ + const char chType = ch; + if ( off + 1 >= cchLine + || pchLine[off] != chType + || pchLine[off + 1] != chType) + pchLine = handleStringLiteral(pIn, chType, pchLine, &cchLine, &enmEol, &iLine, &off); + else + { + /* + * Doc string (/ long string). + * + * Note! This is very similar to the multiline C comment handling above. + */ + Info.iLineStart = iLine; + Info.offStart = (uint32_t)off - 1; + Info.iLineEnd = UINT32_MAX; + Info.offEnd = UINT32_MAX; + Info.cBlankLinesBefore = 0; + Info.enmType = kScmCommentType_DocString; + + off += 2; + + /* Copy the body and find the end of the doc string comment. */ + size_t cbBodyAlloc = 0; + size_t cchBody = 0; + char *pszBody = NULL; + for (;;) + { + /* Parse the line up to the end-of-comment or end-of-line. */ + size_t offLineStart = off; + size_t offLastNonBlank = off; + size_t offFirstNonBlank = ~(size_t)0; + bool fEscaped = false; + while (off < cchLine) + { + ch = pchLine[off++]; + if (!fEscaped) + { + if ( off + 1 >= cchLine + || ch != chType + || pchLine[off] != chType + || pchLine[off + 1] != chType) + { + if (RT_C_IS_BLANK(ch)) + {/* kind of likely */} + else + { + offLastNonBlank = off - 1; + if (offFirstNonBlank != ~(size_t)0) + {/* likely */} + else if ( ch != '*' /* ignore continuation-asterisks */ + || off > Info.offStart + 1 + 1 + || off > cchLine + || ( off < cchLine + && !RT_C_IS_SPACE(pchLine[off])) + || pszBody == NULL) + offFirstNonBlank = off - 1; + + if (ch != '\\') + {/* likely */ } + else + fEscaped = true; + } + } + else + { + off += 2; + Info.offEnd = (uint32_t)off; + Info.iLineEnd = iLine; + break; + } + } + else + fEscaped = false; + } + + /* Append line content to the comment body string. */ + size_t cchAppend; + if (offFirstNonBlank == ~(size_t)0) + cchAppend = 0; /* empty line */ + else + { + if (pszBody) + offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank); + else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */ + offLineStart++; + cchAppend = offLastNonBlank + 1 - offLineStart; + Assert(cchAppend <= cchLine); + } + + size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend; + if (cchNewBody >= cbBodyAlloc) + { + cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128)); + void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc); + if (pvNew) + pszBody = (char *)pvNew; + else + { + RTMemFree(pszBody); + return VERR_NO_MEMORY; + } + } + + if (cchBody > 0) /* no leading blank lines */ + pszBody[cchBody++] = '\n'; + else if (cchAppend == 0) + Info.cBlankLinesBefore++; + memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend); + cchBody += cchAppend; + pszBody[cchBody] = '\0'; + + /* Advance to the next line, if we haven't yet seen the end of this comment. */ + if (Info.iLineEnd != UINT32_MAX) + break; + pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol); + if (!pchLine) + { + Info.offEnd = (uint32_t)cchLine; + Info.iLineEnd = iLine; + break; + } + iLine++; + off = 0; + } + + /* Strip trailing empty lines in the body. */ + Info.cBlankLinesAfter = 0; + while (cchBody >= 1 && pszBody[cchBody - 1] == '\n') + { + Info.cBlankLinesAfter++; + pszBody[--cchBody] = '\0'; + } + + /* Do the callback. */ + int rc = pfnCallback(&Info, pszBody, cchBody, pvUser); + RTMemFree(pszBody); + if (RT_FAILURE(rc)) + return rc; + if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS) + rcRet = rc; + } + + if (!pchLine) + break; + } + /* else: We don't have to deal with character literal as these shouldn't + include comment-like sequences. */ + } /* for each character in the line */ + + iLine++; + } /* for each line in the stream */ + + int rcStream = ScmStreamGetStatus(pIn); + if (RT_SUCCESS(rcStream)) + return rcRet; + return rcStream; +} + + +/** + * Deals with XML comments. + * + * @returns VBox status code / callback return code. + * @param pIn The stream to parse. + * @param pfnCallback The callback. + * @param pvUser The user parameter for the callback. + */ +static int enumerateXmlComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser) +{ + int rcRet = VINF_SUCCESS; + uint32_t iLine = 0; + SCMEOL enmEol; + size_t cchLine; + const char *pchLine; + while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL) + { + size_t off = 0; + while (off < cchLine) + { + /* + * Skip leading blanks and check for start of XML comment. + */ + while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off])) + off++; + if (IS_XML_COMMENT_START(pchLine, off, cchLine)) + { + /* + * XML comment. Find the end. + * + * Note! This is very similar to the python doc string handling above. + */ + SCMCOMMENTINFO Info; + Info.iLineStart = iLine; + Info.offStart = (uint32_t)off; + Info.iLineEnd = UINT32_MAX; + Info.offEnd = UINT32_MAX; + Info.cBlankLinesBefore = 0; + Info.enmType = kScmCommentType_Xml; + + off += 4; + + /* + * Copy the body and find the end of the XML comment. + */ + size_t cbBodyAlloc = 0; + size_t cchBody = 0; + char *pszBody = NULL; + for (;;) + { + /* Parse the line up to the end-of-comment or end-of-line. */ + size_t offLineStart = off; + size_t offLastNonBlank = off; + size_t offFirstNonBlank = ~(size_t)0; + while (off < cchLine) + { + if (!IS_XML_COMMENT_END(pchLine, off, cchLine)) + { + char ch = pchLine[off++]; + if (RT_C_IS_BLANK(ch)) + {/* kind of likely */} + else + { + offLastNonBlank = off - 1; + if (offFirstNonBlank != ~(size_t)0) + {/* likely */} + else if ( (ch != '*' && ch != '#') /* ignore continuation-asterisks */ + || off > Info.offStart + 1 + 1 + || off > cchLine + || ( off < cchLine + && !RT_C_IS_SPACE(pchLine[off])) + || pszBody == NULL) + offFirstNonBlank = off - 1; + } + } + else + { + off += 3; + Info.offEnd = (uint32_t)off; + Info.iLineEnd = iLine; + break; + } + } + + /* Append line content to the comment body string. */ + size_t cchAppend; + if (offFirstNonBlank == ~(size_t)0) + cchAppend = 0; /* empty line */ + else + { + offLineStart = offFirstNonBlank; + cchAppend = offLastNonBlank + 1 - offLineStart; + Assert(cchAppend <= cchLine); + } + + size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend; + if (cchNewBody >= cbBodyAlloc) + { + cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128)); + void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc); + if (pvNew) + pszBody = (char *)pvNew; + else + { + RTMemFree(pszBody); + return VERR_NO_MEMORY; + } + } + + if (cchBody > 0) /* no leading blank lines */ + pszBody[cchBody++] = '\n'; + else if (cchAppend == 0) + Info.cBlankLinesBefore++; + memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend); + cchBody += cchAppend; + pszBody[cchBody] = '\0'; + + /* Advance to the next line, if we haven't yet seen the end of this comment. */ + if (Info.iLineEnd != UINT32_MAX) + break; + pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol); + if (!pchLine) + { + Info.offEnd = (uint32_t)cchLine; + Info.iLineEnd = iLine; + break; + } + iLine++; + off = 0; + } + + /* Strip trailing empty lines in the body. */ + Info.cBlankLinesAfter = 0; + while (cchBody >= 1 && pszBody[cchBody - 1] == '\n') + { + Info.cBlankLinesAfter++; + pszBody[--cchBody] = '\0'; + } + + /* Do the callback. */ + int rc = pfnCallback(&Info, pszBody, cchBody, pvUser); + RTMemFree(pszBody); + if (RT_FAILURE(rc)) + return rc; + if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS) + rcRet = rc; + } + else + off++; + } /* for each character in the line */ + + iLine++; + } /* for each line in the stream */ + + int rcStream = ScmStreamGetStatus(pIn); + if (RT_SUCCESS(rcStream)) + return rcRet; + return rcStream; +} + + +/** + * Deals with comments in DOS batch files. + * + * @returns VBox status code / callback return code. + * @param pIn The stream to parse. + * @param pfnCallback The callback. + * @param pvUser The user parameter for the callback. + */ +static int enumerateBatchComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser) +{ + int rcRet = VINF_SUCCESS; + uint32_t iLine = 0; + SCMEOL enmEol; + size_t cchLine; + const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol); + while (pchLine != NULL) + { + /* + * Skip leading blanks and check for 'rem'. + * At the moment we do not parse '::label-comments'. + */ + size_t off = 0; + while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off])) + off++; + if (!IS_REM(pchLine, off, cchLine)) + { + iLine++; + pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol); + } + else + { + int rc = handleLineComment(pIn, isBatchComment, pfnCallback, pvUser, + &pchLine, &cchLine, &enmEol, &iLine, &off); + if (RT_FAILURE(rc)) + return rc; + if (rcRet == VINF_SUCCESS) + rcRet = rc; + } + } + + int rcStream = ScmStreamGetStatus(pIn); + if (RT_SUCCESS(rcStream)) + return rcRet; + return rcStream; +} + + +/** + * Deals with comments in SQL files. + * + * @returns VBox status code / callback return code. + * @param pIn The stream to parse. + * @param pfnCallback The callback. + * @param pvUser The user parameter for the callback. + */ +static int enumerateSqlComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser) +{ + int rcRet = VINF_SUCCESS; + uint32_t iLine = 0; + SCMEOL enmEol; + size_t cchLine; + const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol); + while (pchLine != NULL) + { + /* + * Skip leading blanks and check for '--'. + */ + size_t off = 0; + while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off])) + off++; + if ( cchLine < 2 + || pchLine[0] != '-' + || pchLine[1] != '-') + { + iLine++; + pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol); + } + else + { + int rc = handleLineComment(pIn, isSqlComment, pfnCallback, pvUser, + &pchLine, &cchLine, &enmEol, &iLine, &off); + if (RT_FAILURE(rc)) + return rc; + if (rcRet == VINF_SUCCESS) + rcRet = rc; + } + } + + int rcStream = ScmStreamGetStatus(pIn); + if (RT_SUCCESS(rcStream)) + return rcRet; + return rcStream; +} + + +/** + * Deals with simple line comments. + * + * @returns VBox status code / callback return code. + * @param pIn The stream to parse. + * @param chStart The start of comment character. + * @param pfnIsComment Comment tester function. + * @param pfnCallback The callback. + * @param pvUser The user parameter for the callback. + */ +static int enumerateSimpleLineComments(PSCMSTREAM pIn, char chStart, PFNISCOMMENT pfnIsComment, + PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser) +{ + int rcRet = VINF_SUCCESS; + uint32_t iLine = 0; + SCMEOL enmEol; + size_t cchLine; + const char *pchLine; + while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL) + { + size_t off = 0; + while (off < cchLine) + { + char ch = pchLine[off++]; + if (ch != chStart) + { /* not interesting */ } + else + { + off -= 1; + int rc = handleLineComment(pIn, pfnIsComment, pfnCallback, pvUser, + &pchLine, &cchLine, &enmEol, &iLine, &off); + if (RT_FAILURE(rc)) + return rc; + if (rcRet == VINF_SUCCESS) + rcRet = rc; + + if (!pchLine) + break; + } + } /* for each character in the line */ + + iLine++; + } /* for each line in the stream */ + + int rcStream = ScmStreamGetStatus(pIn); + if (RT_SUCCESS(rcStream)) + return rcRet; + return rcStream; +} + + +/** + * Enumerates the comments in the given stream, calling @a pfnCallback for each. + * + * @returns IPRT status code. + * @param pIn The stream to parse. + * @param enmCommentStyle The comment style of the source stream. + * @param pfnCallback The function to call. + * @param pvUser User argument to the callback. + */ +int ScmEnumerateComments(PSCMSTREAM pIn, SCMCOMMENTSTYLE enmCommentStyle, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser) +{ + switch (enmCommentStyle) + { + case kScmCommentStyle_C: + return enumerateCStyleComments(pIn, pfnCallback, pvUser); + + case kScmCommentStyle_Python: + return enumeratePythonComments(pIn, pfnCallback, pvUser); + + case kScmCommentStyle_Semicolon: + return enumerateSimpleLineComments(pIn, ';', isSemicolonComment, pfnCallback, pvUser); + + case kScmCommentStyle_Hash: + return enumerateSimpleLineComments(pIn, '#', isHashComment, pfnCallback, pvUser); + + case kScmCommentStyle_Rem_Upper: + case kScmCommentStyle_Rem_Lower: + case kScmCommentStyle_Rem_Camel: + return enumerateBatchComments(pIn, pfnCallback, pvUser); + + case kScmCommentStyle_Sql: + return enumerateSqlComments(pIn, pfnCallback, pvUser); + + case kScmCommentStyle_Tick: + return enumerateSimpleLineComments(pIn, '\'', isTickComment, pfnCallback, pvUser); + + case kScmCommentStyle_Xml: + return enumerateXmlComments(pIn, pfnCallback, pvUser); + + default: + AssertFailedReturn(VERR_INVALID_PARAMETER); + } +} + |