/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace css; // INetURLObject /* The URI grammar (using RFC 2234 conventions). Constructs of the form {reference using rule2} stand for a rule matching the given rule1 specified in the given reference, encoded to URI syntax using rule2 (as specified in this URI grammar). ; RFC 1738, RFC 2396, RFC 2732, private login = [user [":" password] "@"] hostport user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~") password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~") hostport = host [":" port] host = incomplete-hostname / hostname / IPv4address / IPv6reference incomplete-hostname = *(domainlabel ".") domainlabel hostname = *(domainlabel ".") toplabel ["."] domainlabel = alphanum [*(alphanum / "-") alphanum] toplabel = ALPHA [*(alphanum / "-") alphanum] IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT IPv6reference = "[" hexpart [":" IPv4address] "]" hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq]) hexseq = hex4 *(":" hex4) hex4 = 1*4HEXDIG port = *DIGIT escaped = "%" HEXDIG HEXDIG reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]" mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~" alphanum = ALPHA / DIGIT unreserved = alphanum / mark uric = escaped / reserved / unreserved pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@" ; RFC 1738, RFC 2396 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]] segment = *pchar ; RFC 1738, RFC 2396 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]] segment = *(pchar / ";") ; RFC 1738, RFC 2396, file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)] segment = *pchar netbios-name = 1*{ using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")} ; RFC 2368, RFC 2396 mailto-url = "MAILTO:" [to] [headers] to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} headers = "?" header *("&" header) header = hname "=" hvalue hname = {RFC 822 using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY" hvalue = {RFC 822 using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} ; private (see RFC 1738, RFC 2396) vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]] segment = *(pchar / ";") ; private private-url = "PRIVATE:" path ["?" *uric] path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") ; private vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric] name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") ; private https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]] segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") ; private slot-url = "SLOT:" path ["?" *uric] path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") ; private macro-url = "MACRO:" path ["?" *uric] path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") ; private javascript-url = "JAVASCRIPT:" *uric ; RFC 2397 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric mediatype = [type "/" subtype] *(";" attribute "=" value) type = {RFC 2045 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")} subtype = {RFC 2045 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")} attribute = {RFC 2045 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")} value = {RFC 2045 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")} ; RFC 2392, RFC 2396 cid-url = "CID:" {RFC 822 using *uric} ; private vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar) reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") ; private uno-url = ".UNO:" path ["?" *uric] path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") ; private component-url = ".COMPONENT:" path ["?" *uric] path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") ; private vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric] reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") ; RFC 2255 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]] dn = {RFC 2253 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")} attrdesc = {RFC 2251 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")} filter = {RFC 2254 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")} extension = ["!"] ["X-"] extoken ["=" exvalue] extoken = {RFC 2252 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} exvalue = {RFC 2251 using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")} ; private db-url = "DB:" *uric ; private vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part opaque_part = uric_no_slash *uric uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," ; RFC 1738 telnet-url = "TELNET://" login ["/"] ; private vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part opaque_part = uric_no_slash *uric uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," ; private vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment) segment = *pchar ; private unknown-url = scheme ":" 1*uric scheme = ALPHA *(alphanum / "+" / "-" / ".") ; private (http://ubiqx.org/cifs/Appendix-D.html): smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]] segment = *(pchar / ";") */ sal_Int32 INetURLObject::SubString::clear() { sal_Int32 nDelta = -m_nLength; m_nBegin = -1; m_nLength = 0; return nDelta; } sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString, std::u16string_view rSubString) { sal_Int32 nDelta = rSubString.size() - m_nLength; rString.remove(m_nBegin, m_nLength); rString.insert(m_nBegin, rSubString); m_nLength = rSubString.size(); return nDelta; } sal_Int32 INetURLObject::SubString::set(OUString & rString, std::u16string_view rSubString) { sal_Int32 nDelta = rSubString.size() - m_nLength; rString = OUString::Concat(rString.subView(0, m_nBegin)) + rSubString + rString.subView(m_nBegin + m_nLength); m_nLength = rSubString.size(); return nDelta; } sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString, std::u16string_view rSubString, sal_Int32 nTheBegin) { m_nBegin = nTheBegin; return set(rString, rSubString); } inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta) { if (isPresent()) m_nBegin = m_nBegin + nDelta; } int INetURLObject::SubString::compare(SubString const & rOther, OUStringBuffer const & rThisString, OUStringBuffer const & rOtherString) const { sal_Int32 len = std::min(m_nLength, rOther.m_nLength); sal_Unicode const * p1 = rThisString.getStr() + m_nBegin; sal_Unicode const * end = p1 + len; sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin; while (p1 != end) { if (*p1 < *p2) { return -1; } else if (*p1 > *p2) { return 1; } ++p1; ++p2; } return m_nLength < rOther.m_nLength ? -1 : m_nLength > rOther.m_nLength ? 1 : 0; } struct INetURLObject::SchemeInfo { rtl::OUStringConstExpr m_sScheme; char const * m_pPrefix; bool m_bAuthority; bool m_bUser; bool m_bAuth; bool m_bPassword; bool m_bHost; bool m_bPort; bool m_bHierarchical; bool m_bQuery; }; struct INetURLObject::PrefixInfo { enum class Kind { Official, Internal, External }; // order is important! char const * m_pPrefix; char const * m_pTranslatedPrefix; INetProtocol m_eScheme; Kind m_eKind; }; // static inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo(INetProtocol eTheScheme) { static constexpr OUStringLiteral EMPTY = u""; static constexpr OUStringLiteral FTP = u"ftp"; static constexpr OUStringLiteral HTTP = u"http"; static constexpr OUStringLiteral FILE1 = u"file"; // because FILE is already defined static constexpr OUStringLiteral MAILTO = u"mailto"; static constexpr OUStringLiteral VND_WEBDAV = u"vnd.sun.star.webdav"; static constexpr OUStringLiteral PRIVATE = u"private"; static constexpr OUStringLiteral VND_HELP = u"vnd.sun.star.help"; static constexpr OUStringLiteral HTTPS = u"https"; static constexpr OUStringLiteral SLOT = u"slot"; static constexpr OUStringLiteral MACRO = u"macro"; static constexpr OUStringLiteral JAVASCRIPT = u"javascript"; static constexpr OUStringLiteral DATA = u"data"; static constexpr OUStringLiteral CID = u"cid"; static constexpr OUStringLiteral VND_HIER = u"vnd.sun.star.hier"; static constexpr OUStringLiteral UNO = u".uno"; static constexpr OUStringLiteral COMPONENT = u".component"; static constexpr OUStringLiteral VND_PKG = u"vnd.sun.star.pkg"; static constexpr OUStringLiteral LDAP = u"ldap"; static constexpr OUStringLiteral DB = u"db"; static constexpr OUStringLiteral VND_CMD = u"vnd.sun.star.cmd"; static constexpr OUStringLiteral TELNET = u"telnet"; static constexpr OUStringLiteral VND_EXPAND = u"vnd.sun.star.expand"; static constexpr OUStringLiteral VND_TDOC = u"vnd.sun.star.tdoc"; static constexpr OUStringLiteral SMB = u"smb"; static constexpr OUStringLiteral HID = u"hid"; static constexpr OUStringLiteral SFTP = u"sftp"; static constexpr OUStringLiteral VND_CMIS = u"vnd.libreoffice.cmis"; static o3tl::enumarray const map = { SchemeInfo{ EMPTY, "", false, false, false, false, false, false, false, false}, SchemeInfo{ FTP, "ftp://", true, true, false, true, true, true, true, false}, SchemeInfo{ HTTP, "http://", true, false, false, false, true, true, true, true}, SchemeInfo{ FILE1, "file://", true, false, false, false, true, false, true, false}, SchemeInfo{ MAILTO, "mailto:", false, false, false, false, false, false, false, true}, SchemeInfo{ VND_WEBDAV, "vnd.sun.star.webdav://", true, false, false, false, true, true, true, true}, SchemeInfo{ PRIVATE, "private:", false, false, false, false, false, false, false, true}, SchemeInfo{ VND_HELP, "vnd.sun.star.help://", true, false, false, false, false, false, true, true}, SchemeInfo{ HTTPS, "https://", true, false, false, false, true, true, true, true}, SchemeInfo{ SLOT, "slot:", false, false, false, false, false, false, false, true}, SchemeInfo{ MACRO, "macro:", false, false, false, false, false, false, false, true}, SchemeInfo{ JAVASCRIPT, "javascript:", false, false, false, false, false, false, false, false}, SchemeInfo{ DATA, "data:", false, false, false, false, false, false, false, false}, SchemeInfo{ CID, "cid:", false, false, false, false, false, false, false, false}, SchemeInfo{ VND_HIER, "vnd.sun.star.hier:", true, false, false, false, false, false, true, false}, SchemeInfo{ UNO, ".uno:", false, false, false, false, false, false, false, true}, SchemeInfo{ COMPONENT, ".component:", false, false, false, false, false, false, false, true}, SchemeInfo{ VND_PKG, "vnd.sun.star.pkg://", true, false, false, false, false, false, true, true}, SchemeInfo{ LDAP, "ldap://", true, false, false, false, true, true, false, true}, SchemeInfo{ DB, "db:", false, false, false, false, false, false, false, false}, SchemeInfo{ VND_CMD, "vnd.sun.star.cmd:", false, false, false, false, false, false, false, false}, SchemeInfo{ TELNET, "telnet://", true, true, false, true, true, true, true, false}, SchemeInfo{ VND_EXPAND, "vnd.sun.star.expand:", false, false, false, false, false, false, false, false}, SchemeInfo{ VND_TDOC, "vnd.sun.star.tdoc:", false, false, false, false, false, false, true, false}, SchemeInfo{ EMPTY, "", false, false, false, false, true, true, true, false }, SchemeInfo{ SMB, "smb://", true, true, false, true, true, true, true, true}, SchemeInfo{ HID, "hid:", false, false, false, false, false, false, false, true}, SchemeInfo{ SFTP, "sftp://", true, true, false, true, true, true, true, true}, SchemeInfo{ VND_CMIS, "vnd.libreoffice.cmis://", true, true, false, false, true, false, true, true} }; return map[eTheScheme]; }; inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const { return getSchemeInfo(m_eScheme); } namespace { sal_Unicode getHexDigit(sal_uInt32 nWeight) { assert(nWeight < 16); static const sal_Unicode aDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; return aDigits[nWeight]; } } // static inline void INetURLObject::appendEscape(OUStringBuffer & rTheText, sal_uInt32 nOctet) { rTheText.append( '%' ); rTheText.append( getHexDigit(nOctet >> 4) ); rTheText.append( getHexDigit(nOctet & 15) ); } namespace { enum { PA = INetURLObject::PART_USER_PASSWORD, PD = INetURLObject::PART_FPATH, PE = INetURLObject::PART_AUTHORITY, PF = INetURLObject::PART_REL_SEGMENT_EXTRA, PG = INetURLObject::PART_URIC, PH = INetURLObject::PART_HTTP_PATH, PI = INetURLObject::PART_MESSAGE_ID_PATH, PJ = INetURLObject::PART_MAILTO, PK = INetURLObject::PART_PATH_BEFORE_QUERY, PL = INetURLObject::PART_PCHAR, PM = INetURLObject::PART_VISIBLE, PN = INetURLObject::PART_VISIBLE_NONSPECIAL, PO = INetURLObject::PART_UNO_PARAM_VALUE, PP = INetURLObject::PART_UNAMBIGUOUS, PQ = INetURLObject::PART_URIC_NO_SLASH, PR = INetURLObject::PART_HTTP_QUERY, }; sal_uInt32 const aMustEncodeMap[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */ PP, /* ! */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* " */ PM+PN +PP, /* # */ PM, /* $ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* % */ PM, /* & */ PA +PD+PE+PF+PG+PH+PI +PK+PL+PM+PN+PO +PQ+PR, /* ' */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ( */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ) */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* * */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* + */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR, /* , */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN +PQ+PR, /* - */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* . */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* / */ +PD +PG+PH+PI+PJ+PK +PM+PN+PO, /* 0 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 1 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 2 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 3 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 4 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 5 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 6 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 7 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 8 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 9 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* : */ +PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR, /* ; */ PA +PE+PF+PG+PH+PI+PJ+PK +PM +PQ+PR, /* < */ +PI +PM+PN +PP, /* = */ PA +PD+PE+PF+PG+PH +PK+PL+PM+PN +PQ+PR, /* > */ +PI +PM+PN +PP, /* ? */ +PG +PM +PO +PQ, /* @ */ +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* A */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* B */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* C */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* D */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* E */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* F */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* G */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* H */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* I */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* J */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* K */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* L */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* M */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* N */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* O */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* P */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* R */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* S */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* T */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* U */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* V */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* W */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* X */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* [ */ PG +PM+PN+PO, /* \ */ +PM+PN +PP, /* ] */ PG +PM+PN+PO, /* ^ */ PM+PN +PP, /* _ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ` */ PM+PN +PP, /* a */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* b */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* c */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* d */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* e */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* f */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* g */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* h */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* i */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* j */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* k */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* l */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* m */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* n */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* o */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* p */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* r */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* s */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* t */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* u */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* v */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* w */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* x */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* { */ PM+PN +PP, /* | */ +PM+PN +PP, /* } */ PM+PN +PP, /* ~ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ, 0 }; bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart) { return !rtl::isAscii(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart); } } void INetURLObject::setInvalid() { m_aAbsURIRef.setLength(0); m_eScheme = INetProtocol::NotValid; m_aScheme.clear(); m_aUser.clear(); m_aAuth.clear(); m_aHost.clear(); m_aPort.clear(); m_aPath.clear(); m_aQuery.clear(); m_aFragment.clear(); } namespace { std::unique_ptr memoryStream( void const * data, sal_Int32 length) { std::unique_ptr b( new char[length]); memcpy(b.get(), data, length); std::unique_ptr s( new SvMemoryStream(b.get(), length, StreamMode::READ)); s->ObjectOwnsMemory(true); b.release(); return s; } } std::unique_ptr INetURLObject::getData() const { if( GetProtocol() != INetProtocol::Data ) { return nullptr; } OUString sURLPath = GetURLPath( DecodeMechanism::WithCharset, RTL_TEXTENCODING_ISO_8859_1 ); sal_Unicode const * pSkippedMediatype = INetMIME::scanContentType( sURLPath ); sal_Int32 nCharactersSkipped = pSkippedMediatype == nullptr ? 0 : pSkippedMediatype-sURLPath.getStr(); if (sURLPath.match(",", nCharactersSkipped)) { nCharactersSkipped += strlen(","); OString sURLEncodedData( sURLPath.getStr() + nCharactersSkipped, sURLPath.getLength() - nCharactersSkipped, RTL_TEXTENCODING_ISO_8859_1, OUSTRING_TO_OSTRING_CVTFLAGS); return memoryStream( sURLEncodedData.getStr(), sURLEncodedData.getLength()); } else if (sURLPath.matchIgnoreAsciiCase(";base64,", nCharactersSkipped)) { nCharactersSkipped += strlen(";base64,"); std::u16string_view sBase64Data = sURLPath.subView( nCharactersSkipped ); css::uno::Sequence< sal_Int8 > aDecodedData; if (comphelper::Base64::decodeSomeChars(aDecodedData, sBase64Data) == sBase64Data.size()) { return memoryStream( aDecodedData.getArray(), aDecodedData.getLength()); } } return nullptr; } namespace { FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin, sal_Unicode const * pEnd, FSysStyle eStyle) { DBG_ASSERT(eStyle & (FSysStyle::Unix | FSysStyle::Dos), "guessFSysStyleByCounting(): Bad style"); DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(), "guessFSysStyleByCounting(): Too big"); sal_Int32 nSlashCount = (eStyle & FSysStyle::Unix) ? 0 : std::numeric_limits< sal_Int32 >::min(); sal_Int32 nBackslashCount = (eStyle & FSysStyle::Dos) ? 0 : std::numeric_limits< sal_Int32 >::min(); while (pBegin != pEnd) switch (*pBegin++) { case '/': ++nSlashCount; break; case '\\': ++nBackslashCount; break; } return nSlashCount >= nBackslashCount ? FSysStyle::Unix : FSysStyle::Dos; } OUString parseScheme( sal_Unicode const ** begin, sal_Unicode const * end, sal_uInt32 fragmentDelimiter) { sal_Unicode const * p = *begin; if (p != end && rtl::isAsciiAlpha(*p)) { do { ++p; } while (p != end && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-' || *p == '.')); // #i34835# To avoid problems with Windows file paths like "C:\foo", // do not accept generic schemes that are only one character long: if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter && p - *begin >= 2) { OUString scheme( OUString(*begin, p - *begin).toAsciiLowerCase()); *begin = p + 1; return scheme; } } return OUString(); } } bool INetURLObject::setAbsURIRef(std::u16string_view rTheAbsURIRef, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart, FSysStyle eStyle) { sal_Unicode const * pPos = rTheAbsURIRef.data(); sal_Unicode const * pEnd = pPos + rTheAbsURIRef.size(); setInvalid(); sal_uInt32 nFragmentDelimiter = '#'; m_aAbsURIRef.setLength(0); // Parse : sal_Unicode const * p = pPos; PrefixInfo const * pPrefix = getPrefix(p, pEnd); if (pPrefix) { pPos = p; m_eScheme = pPrefix->m_eScheme; char const * pTemp = pPrefix->m_eKind >= PrefixInfo::Kind::External ? pPrefix->m_pTranslatedPrefix : pPrefix->m_pPrefix; m_aAbsURIRef.appendAscii(pTemp); m_aScheme = SubString( 0, strstr(pTemp, ":") - pTemp ); } else { if (bSmart) { // For scheme detection, the first (if any) of the following // productions that matches the input string (and for which the // appropriate style bit is set in eStyle, if applicable) // determines the scheme. The productions use the auxiliary rules // domain = label *("." label) // label = alphanum [*(alphanum / "-") alphanum] // alphanum = ALPHA / DIGIT // IPv6reference = "[" IPv6address "]" // IPv6address = hexpart [":" IPv4address] // IPv4address = 1*3DIGIT 3("." 1*3DIGIT) // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq]) // hexseq = hex4 *(":" hex4) // hex4 = 1*4HEXDIG // UCS4 = // 1st Production (known scheme; handled by the "if (pPrefix)" branch above): // ":" *UCS4 // 2nd Production (mailto): // domain "@" domain // 3rd Production (ftp): // "FTP" 2*("." label) ["/" *UCS4] // 4th Production (http): // label 2*("." label) ["/" *UCS4] // 5th Production (file): // "//" (domain / IPv6reference) ["/" *UCS4] // 6th Production (Unix file): // "/" *UCS4 // 7th Production (UNC file; FSysStyle::Dos only): // "\\" domain ["\" *UCS4] // 8th Production (Unix-like DOS file; FSysStyle::Dos only): // ALPHA ":" ["/" *UCS4] // 9th Production (DOS file; FSysStyle::Dos only): // ALPHA ":" ["\" *UCS4] // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code // after this else branch): // ":" *UCS4 // For the 'non URL' file productions 6--9, the interpretation of // the input as a (degenerate) URI is turned off, i.e., escape // sequences and fragments are never detected as such, but are // taken as literal characters. sal_Unicode const * p1 = pPos; if (eStyle & FSysStyle::Dos && pEnd - p1 >= 2 && rtl::isAsciiAlpha(p1[0]) && p1[1] == ':' && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\')) { m_eScheme = INetProtocol::File; // 8th, 9th eMechanism = EncodeMechanism::All; nFragmentDelimiter = 0x80000000; } else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/') { p1 += 2; if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd)) && (p1 == pEnd || *p1 == '/')) m_eScheme = INetProtocol::File; // 5th } else if (p1 != pEnd && *p1 == '/') { m_eScheme = INetProtocol::File; // 6th eMechanism = EncodeMechanism::All; nFragmentDelimiter = 0x80000000; } else if (eStyle & FSysStyle::Dos && pEnd - p1 >= 2 && p1[0] == '\\' && p1[1] == '\\') { p1 += 2; sal_Int32 n = rtl_ustr_indexOfChar_WithLength( p1, pEnd - p1, '\\'); sal_Unicode const * pe = n == -1 ? pEnd : p1 + n; if ( parseHostOrNetBiosName( p1, pe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW, true, nullptr) || (scanDomain(p1, pe) > 0 && p1 == pe) ) { m_eScheme = INetProtocol::File; // 7th eMechanism = EncodeMechanism::All; nFragmentDelimiter = 0x80000000; } } else { sal_Unicode const * pDomainEnd = p1; sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd); if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@') { ++pDomainEnd; if (scanDomain(pDomainEnd, pEnd) > 0 && pDomainEnd == pEnd) m_eScheme = INetProtocol::Mailto; // 2nd } else if (nLabels >= 3 && (pDomainEnd == pEnd || *pDomainEnd == '/')) m_eScheme = pDomainEnd - p1 >= 4 && (p1[0] == 'f' || p1[0] == 'F') && (p1[1] == 't' || p1[1] == 'T') && (p1[2] == 'p' || p1[2] == 'P') && p1[3] == '.' ? INetProtocol::Ftp : INetProtocol::Http; // 3rd, 4th } } OUString aSynScheme; if (m_eScheme == INetProtocol::NotValid) { sal_Unicode const * p1 = pPos; aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter); if (!aSynScheme.isEmpty()) { if (bSmart && m_eSmartScheme != m_eScheme && p1 != pEnd && rtl::isAsciiDigit(*p1)) { // rTheAbsURIRef doesn't define a known scheme (handled by the "if (pPrefix)" // branch above); but a known scheme is defined in m_eSmartScheme. If this // scheme may have a port in authority component, then avoid misinterpreting // URLs like www.foo.bar:123/baz as using unknown "www.foo.bar" scheme with // 123/baz rootless path. For now, do not try to handle possible colons in // user information, require such ambiguous URLs to have explicit scheme part. // Also ignore possibility of empty port. const SchemeInfo& rInfo = getSchemeInfo(m_eSmartScheme); if (rInfo.m_bAuthority && rInfo.m_bPort) { // Make sure that all characters from colon to [/?#] or to EOL are digits. // Or maybe make it simple, and just assume that "xyz:1..." is more likely // to be host "xyz" and port "1...", than scheme "xyz" and path "1..."? sal_Unicode const* p2 = p1 + 1; while (p2 != pEnd && rtl::isAsciiDigit(*p2)) ++p2; if (p2 == pEnd || *p2 == '/' || *p2 == '?' || *p2 == '#') m_eScheme = m_eSmartScheme; } } if (m_eScheme == INetProtocol::NotValid) { m_eScheme = INetProtocol::Generic; pPos = p1; } } } if (bSmart && m_eScheme == INetProtocol::NotValid && pPos != pEnd && *pPos != nFragmentDelimiter) { m_eScheme = m_eSmartScheme; } if (m_eScheme == INetProtocol::NotValid) { setInvalid(); return false; } if (m_eScheme != INetProtocol::Generic) { aSynScheme = static_cast(getSchemeInfo().m_sScheme); } m_aScheme.set(m_aAbsURIRef, aSynScheme, m_aAbsURIRef.getLength()); m_aAbsURIRef.append(':'); } sal_uInt32 nSegmentDelimiter = '/'; sal_uInt32 nAltSegmentDelimiter = 0x80000000; bool bSkippedInitialSlash = false; // Parse //;AUTH=@: or // //:@: or // // if (getSchemeInfo().m_bAuthority) { sal_Unicode const * pUserInfoBegin = nullptr; sal_Unicode const * pUserInfoEnd = nullptr; sal_Unicode const * pHostPortBegin = nullptr; sal_Unicode const * pHostPortEnd = nullptr; switch (m_eScheme) { case INetProtocol::VndSunStarHelp: { if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/') { setInvalid(); return false; } m_aAbsURIRef.append("//"); OUStringBuffer aSynAuthority; while (pPos < pEnd && *pPos != '/' && *pPos != '?' && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynAuthority, nUTF32, eEscapeType, PART_AUTHORITY, eCharset, false); } m_aHost.set(m_aAbsURIRef, aSynAuthority, m_aAbsURIRef.getLength()); // misusing m_aHost to store the authority break; } case INetProtocol::VndSunStarHier: { if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') { pPos += 2; m_aAbsURIRef.append("//"); OUStringBuffer aSynAuthority; while (pPos < pEnd && *pPos != '/' && *pPos != '?' && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynAuthority, nUTF32, eEscapeType, PART_AUTHORITY, eCharset, false); } if (aSynAuthority.isEmpty()) { setInvalid(); return false; } m_aHost.set(m_aAbsURIRef, aSynAuthority, m_aAbsURIRef.getLength()); // misusing m_aHost to store the authority } break; } case INetProtocol::VndSunStarPkg: case INetProtocol::Cmis: { if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/') { setInvalid(); return false; } m_aAbsURIRef.append("//"); OUStringBuffer aSynUser(128); bool bHasUser = false; while (pPos < pEnd && *pPos != '@' && *pPos != '/' && *pPos != '?' && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynUser, nUTF32, eEscapeType, PART_USER_PASSWORD, eCharset, false); bHasUser = *pPos == '@'; } OUStringBuffer aSynAuthority(64); if ( !bHasUser ) { aSynAuthority = aSynUser; } else { m_aUser.set(m_aAbsURIRef, aSynUser, m_aAbsURIRef.getLength()); m_aAbsURIRef.append("@"); ++pPos; while (pPos < pEnd && *pPos != '/' && *pPos != '?' && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynAuthority, nUTF32, eEscapeType, PART_AUTHORITY, eCharset, false); } } if (aSynAuthority.isEmpty()) { setInvalid(); return false; } m_aHost.set(m_aAbsURIRef, aSynAuthority, m_aAbsURIRef.getLength()); // misusing m_aHost to store the authority break; } case INetProtocol::File: if (bSmart) { // The first of the following seven productions that // matches the rest of the input string (and for which the // appropriate style bit is set in eStyle, if applicable) // determines the used notation. The productions use the // auxiliary rules // domain = label *("." label) // label = alphanum [*(alphanum / "-") alphanum] // alphanum = ALPHA / DIGIT // IPv6reference = "[" IPv6address "]" // IPv6address = hexpart [":" IPv4address] // IPv4address = 1*3DIGIT 3("." 1*3DIGIT) // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq]) // hexseq = hex4 *(":" hex4) // hex4 = 1*4HEXDIG // path = // UCS4 = // 1st Production (URL): // "//" [domain / IPv6reference] ["/" *path] // ["#" *UCS4] // becomes // "file://" domain "/" *path ["#" *UCS4] if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') { sal_Unicode const * p1 = pPos + 2; while (p1 != pEnd && *p1 != '/' && *p1 != nFragmentDelimiter) { ++p1; } if (parseHostOrNetBiosName( pPos + 2, p1, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW, true, nullptr)) { m_aAbsURIRef.append("//"); pHostPortBegin = pPos + 2; pHostPortEnd = p1; pPos = p1; break; } } // 2nd Production (MS IE generated 1; FSysStyle::Dos only): // "//" ALPHA ":" ["/" *path] ["#" *UCS4] // becomes // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4] // replacing "\" by "/" within <*path> // 3rd Production (MS IE generated 2; FSysStyle::Dos only): // "//" ALPHA ":" ["\" *path] ["#" *UCS4] // becomes // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4] // replacing "\" by "/" within <*path> // 4th Production (miscounted slashes): // "//" *path ["#" *UCS4] // becomes // "file:///" *path ["#" *UCS4] if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') { m_aAbsURIRef.append("//"); pPos += 2; bSkippedInitialSlash = true; if ((eStyle & FSysStyle::Dos) && pEnd - pPos >= 2 && rtl::isAsciiAlpha(pPos[0]) && pPos[1] == ':' && (pEnd - pPos == 2 || pPos[2] == '/' || pPos[2] == '\\')) nAltSegmentDelimiter = '\\'; break; } // 5th Production (Unix): // "/" *path ["#" *UCS4] // becomes // "file:///" *path ["#" *UCS4] if (pPos < pEnd && *pPos == '/') { m_aAbsURIRef.append("//"); break; } // 6th Production (UNC; FSysStyle::Dos only): // "\\" domain ["\" *path] ["#" *UCS4] // becomes // "file://" domain "/" *path ["#" *UCS4] // replacing "\" by "/" within <*path> if (eStyle & FSysStyle::Dos && pEnd - pPos >= 2 && pPos[0] == '\\' && pPos[1] == '\\') { sal_Unicode const * p1 = pPos + 2; sal_Unicode const * pe = p1; while (pe < pEnd && *pe != '\\' && *pe != nFragmentDelimiter) { ++pe; } if ( parseHostOrNetBiosName( p1, pe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW, true, nullptr) || (scanDomain(p1, pe) > 0 && p1 == pe) ) { m_aAbsURIRef.append("//"); pHostPortBegin = pPos + 2; pHostPortEnd = pe; pPos = pe; nSegmentDelimiter = '\\'; break; } } // 7th Production (Unix-like DOS; FSysStyle::Dos only): // ALPHA ":" ["/" *path] ["#" *UCS4] // becomes // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4] // replacing "\" by "/" within <*path> // 8th Production (DOS; FSysStyle::Dos only): // ALPHA ":" ["\" *path] ["#" *UCS4] // becomes // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4] // replacing "\" by "/" within <*path> if (eStyle & FSysStyle::Dos && pEnd - pPos >= 2 && rtl::isAsciiAlpha(pPos[0]) && pPos[1] == ':' && (pEnd - pPos == 2 || pPos[2] == '/' || pPos[2] == '\\')) { m_aAbsURIRef.append("//"); nAltSegmentDelimiter = '\\'; bSkippedInitialSlash = true; break; } // 9th Production (any): // *path ["#" *UCS4] // becomes // "file:///" *path ["#" *UCS4] // replacing the delimiter by "/" within <*path>. The // delimiter is that character from the set { "/", "\"} // which appears most often in <*path> (if FSysStyle::Unix // is not among the style bits, "/" is removed from the // set; if FSysStyle::Dos is not among the style bits, "\" is // removed from the set). If two or // more characters appear the same number of times, the // character mentioned first in that set is chosen. If // the first character of <*path> is the delimiter, that // character is not copied if (eStyle & (FSysStyle::Unix | FSysStyle::Dos)) { m_aAbsURIRef.append("//"); switch (guessFSysStyleByCounting(pPos, pEnd, eStyle)) { case FSysStyle::Unix: nSegmentDelimiter = '/'; break; case FSysStyle::Dos: nSegmentDelimiter = '\\'; break; default: OSL_FAIL( "INetURLObject::setAbsURIRef():" " Bad guessFSysStyleByCounting"); break; } bSkippedInitialSlash = pPos != pEnd && *pPos != nSegmentDelimiter; break; } } [[fallthrough]]; default: { // For INetProtocol::File, allow an empty authority ("//") to be // missing if the following path starts with an explicit "/" // (Java is notorious in generating such file URLs, so be // liberal here): if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') pPos += 2; else if (!bSmart && !(m_eScheme == INetProtocol::File && pPos != pEnd && *pPos == '/')) { setInvalid(); return false; } m_aAbsURIRef.append("//"); sal_Unicode const * pAuthority = pPos; sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000; while (pPos < pEnd && *pPos != '/' && *pPos != c && *pPos != nFragmentDelimiter) ++pPos; if (getSchemeInfo().m_bUser) if (getSchemeInfo().m_bHost) { sal_Unicode const * p1 = pAuthority; while (p1 < pPos && *p1 != '@') ++p1; if (p1 == pPos) { pHostPortBegin = pAuthority; pHostPortEnd = pPos; } else { pUserInfoBegin = pAuthority; pUserInfoEnd = p1; pHostPortBegin = p1 + 1; pHostPortEnd = pPos; } } else { pUserInfoBegin = pAuthority; pUserInfoEnd = pPos; } else if (getSchemeInfo().m_bHost) { pHostPortBegin = pAuthority; pHostPortEnd = pPos; } else if (pPos != pAuthority) { setInvalid(); return false; } break; } } if (pUserInfoBegin) { Part ePart = PART_USER_PASSWORD; bool bSupportsPassword = getSchemeInfo().m_bPassword; bool bSupportsAuth = !bSupportsPassword && getSchemeInfo().m_bAuth; bool bHasAuth = false; OUStringBuffer aSynUser; sal_Unicode const * p1 = pUserInfoBegin; while (p1 < pUserInfoEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, eMechanism, eCharset, eEscapeType); if (eEscapeType == EscapeType::NONE) { if (nUTF32 == ':' && bSupportsPassword) { bHasAuth = true; break; } else if (nUTF32 == ';' && bSupportsAuth && pUserInfoEnd - p1 > RTL_CONSTASCII_LENGTH("auth=") && INetMIME::equalIgnoreCase( p1, p1 + RTL_CONSTASCII_LENGTH("auth="), "auth=")) { p1 += RTL_CONSTASCII_LENGTH("auth="); bHasAuth = true; break; } } appendUCS4(aSynUser, nUTF32, eEscapeType, ePart, eCharset, false); } m_aUser.set(m_aAbsURIRef, aSynUser, m_aAbsURIRef.getLength()); if (bHasAuth) { if (bSupportsPassword) { m_aAbsURIRef.append(':'); OUStringBuffer aSynAuth; while (p1 < pUserInfoEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynAuth, nUTF32, eEscapeType, ePart, eCharset, false); } m_aAuth.set(m_aAbsURIRef, aSynAuth, m_aAbsURIRef.getLength()); } else { m_aAbsURIRef.append(";AUTH="); OUStringBuffer aSynAuth; while (p1 < pUserInfoEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, eMechanism, eCharset, eEscapeType); if (!INetMIME::isIMAPAtomChar(nUTF32)) { setInvalid(); return false; } appendUCS4(aSynAuth, nUTF32, eEscapeType, ePart, eCharset, false); } m_aAuth.set(m_aAbsURIRef, aSynAuth, m_aAbsURIRef.getLength()); } } if (pHostPortBegin) m_aAbsURIRef.append('@'); } if (pHostPortBegin) { sal_Unicode const * pPort = pHostPortEnd; if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd ) { sal_Unicode const * p1 = pHostPortEnd - 1; while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1)) --p1; if (*p1 == ':') pPort = p1; } bool bNetBiosName = false; switch (m_eScheme) { case INetProtocol::File: // If the host equals "LOCALHOST" (unencoded and ignoring // case), turn it into an empty host: if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort, "localhost")) pHostPortBegin = pPort; bNetBiosName = true; break; case INetProtocol::Ldap: case INetProtocol::Smb: if (pHostPortBegin == pPort && pPort != pHostPortEnd) { setInvalid(); return false; } break; default: if (pHostPortBegin == pPort) { setInvalid(); return false; } break; } sal_Int32 nLenBeforeHost = m_aAbsURIRef.getLength(); if (!parseHostOrNetBiosName( pHostPortBegin, pPort, eMechanism, eCharset, bNetBiosName, &m_aAbsURIRef)) { setInvalid(); return false; } m_aHost = SubString(nLenBeforeHost, m_aAbsURIRef.getLength() - nLenBeforeHost); if (pPort != pHostPortEnd) { m_aAbsURIRef.append(':'); m_aPort.set(m_aAbsURIRef, std::u16string_view{pPort + 1, static_cast(pHostPortEnd - (pPort + 1))}, m_aAbsURIRef.getLength()); } } } // Parse sal_Int32 nBeforePathLength = m_aAbsURIRef.getLength(); if (!parsePath(m_eScheme, &pPos, pEnd, eMechanism, eCharset, bSkippedInitialSlash, nSegmentDelimiter, nAltSegmentDelimiter, getSchemeInfo().m_bQuery ? '?' : 0x80000000, nFragmentDelimiter, m_aAbsURIRef)) { setInvalid(); return false; } m_aPath = SubString(nBeforePathLength, m_aAbsURIRef.getLength() - nBeforePathLength); // Parse ? if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?') { m_aAbsURIRef.append('?'); OUStringBuffer aSynQuery; for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynQuery, nUTF32, eEscapeType, PART_URIC, eCharset, true); } m_aQuery.set(m_aAbsURIRef, aSynQuery, m_aAbsURIRef.getLength()); } // Parse # if (pPos < pEnd && *pPos == nFragmentDelimiter) { m_aAbsURIRef.append(sal_Unicode(nFragmentDelimiter)); OUStringBuffer aSynFragment; for (++pPos; pPos < pEnd;) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynFragment, nUTF32, eEscapeType, PART_URIC, eCharset, true); } m_aFragment.set(m_aAbsURIRef, aSynFragment, m_aAbsURIRef.getLength()); } if (pPos != pEnd) { setInvalid(); return false; } return true; } void INetURLObject::changeScheme(INetProtocol eTargetScheme) { sal_Int32 oldSchemeLen = 0; const OUString& rOldSchemeName = getSchemeInfo().m_sScheme; if (m_eScheme == INetProtocol::Generic) oldSchemeLen = m_aScheme.getLength(); else oldSchemeLen = rOldSchemeName.getLength(); m_eScheme=eTargetScheme; const OUString& rNewSchemeName = getSchemeInfo().m_sScheme; sal_Int32 newSchemeLen = rNewSchemeName.getLength(); m_aAbsURIRef.remove(0, oldSchemeLen); m_aAbsURIRef.insert(0, rNewSchemeName); sal_Int32 delta=newSchemeLen-oldSchemeLen; m_aUser+=delta; m_aAuth+=delta; m_aHost+=delta; m_aPort+=delta; m_aPath+=delta; m_aQuery+=delta; m_aFragment+=delta; } bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef, INetURLObject & rTheAbsURIRef, bool & rWasAbsolute, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs, FSysStyle eStyle) const { sal_Unicode const * p = rTheRelURIRef.getStr(); sal_Unicode const * pEnd = p + rTheRelURIRef.getLength(); sal_Unicode const * pPrefixBegin = p; PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd); bool hasScheme = pPrefix != nullptr; if (!hasScheme) { pPrefixBegin = p; hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty(); } sal_uInt32 nSegmentDelimiter = '/'; sal_uInt32 nQueryDelimiter = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000; sal_uInt32 nFragmentDelimiter = '#'; Part ePart = PART_VISIBLE; if (!hasScheme && bSmart) { // If the input matches any of the following productions (for which // the appropriate style bit is set in eStyle), it is assumed to be an // absolute file system path, rather than a relative URI reference. // (This is only a subset of the productions used for scheme detection // in INetURLObject::setAbsURIRef(), because most of those productions // interfere with the syntax of relative URI references.) The // productions use the auxiliary rules // domain = label *("." label) // label = alphanum [*(alphanum / "-") alphanum] // alphanum = ALPHA / DIGIT // UCS4 = // 1st Production (UNC file; FSysStyle::Dos only): // "\\" domain ["\" *UCS4] // 2nd Production (Unix-like DOS file; FSysStyle::Dos only): // ALPHA ":" ["/" *UCS4] // 3rd Production (DOS file; FSysStyle::Dos only): // ALPHA ":" ["\" *UCS4] if (eStyle & FSysStyle::Dos) { bool bFSys = false; sal_Unicode const * q = p; if (pEnd - q >= 2 && rtl::isAsciiAlpha(q[0]) && q[1] == ':' && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\')) bFSys = true; // 2nd, 3rd else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\') { q += 2; sal_Int32 n = rtl_ustr_indexOfChar_WithLength( q, pEnd - q, '\\'); sal_Unicode const * qe = n == -1 ? pEnd : q + n; if (parseHostOrNetBiosName( q, qe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW, true, nullptr)) { bFSys = true; // 1st } } if (bFSys) { INetURLObject aNewURI; aNewURI.setAbsURIRef(rTheRelURIRef, eMechanism, eCharset, true, eStyle); if (!aNewURI.HasError()) { rTheAbsURIRef = aNewURI; rWasAbsolute = true; return true; } } } // When the base URL is a file URL, accept relative file system paths // using "\" or ":" as delimiter (and ignoring URI conventions for "%" // and "#"), as well as relative URIs using "/" as delimiter: if (m_eScheme == INetProtocol::File) switch (guessFSysStyleByCounting(p, pEnd, eStyle)) { case FSysStyle::Unix: nSegmentDelimiter = '/'; break; case FSysStyle::Dos: nSegmentDelimiter = '\\'; bRelativeNonURIs = true; break; default: OSL_FAIL("INetURLObject::convertRelToAbs():" " Bad guessFSysStyleByCounting"); break; } if (bRelativeNonURIs) { eMechanism = EncodeMechanism::All; nQueryDelimiter = 0x80000000; nFragmentDelimiter = 0x80000000; ePart = PART_VISIBLE_NONSPECIAL; } } // If the relative URI has the same scheme as the base URI, and that // scheme is hierarchical, then ignore its presence in the relative // URI in order to be backward compatible (cf. RFC 2396 section 5.2 // step 3): if (pPrefix && pPrefix->m_eScheme == m_eScheme && getSchemeInfo().m_bHierarchical) { hasScheme = false; while (p != pEnd && *p++ != ':') ; } rWasAbsolute = hasScheme; // Fast solution for non-relative URIs: if (hasScheme) { INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset); if (aNewURI.HasError()) { rWasAbsolute = false; return false; } if (bIgnoreFragment) aNewURI.clearFragment(); rTheAbsURIRef = aNewURI; return true; } enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT, STATE_DONE }; OUStringBuffer aSynAbsURIRef(128); // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme // is empty ("") in that case, so take the scheme from m_aAbsURIRef if (m_eScheme != INetProtocol::Generic) { aSynAbsURIRef.append(getSchemeInfo().m_sScheme.asView()); } else { sal_Unicode const * pSchemeBegin = m_aAbsURIRef.getStr(); sal_Unicode const * pSchemeEnd = pSchemeBegin; while (pSchemeEnd[0] != ':') { ++pSchemeEnd; } aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin); } aSynAbsURIRef.append(':'); State eState = STATE_AUTH; bool bSameDoc = true; if (getSchemeInfo().m_bAuthority) { if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/') { aSynAbsURIRef.append("//"); p += 2; eState = STATE_ABS_PATH; bSameDoc = false; while (p != pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType); if (eEscapeType == EscapeType::NONE) { if (nUTF32 == nSegmentDelimiter) break; else if (nUTF32 == nFragmentDelimiter) { eState = STATE_FRAGMENT; break; } } appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, PART_VISIBLE, eCharset, true); } } else { SubString aAuthority(getAuthority()); aSynAbsURIRef.append(m_aAbsURIRef.getStr() + aAuthority.getBegin(), aAuthority.getLength()); } } if (eState == STATE_AUTH) { if (p == pEnd) eState = STATE_DONE; else if (*p == nFragmentDelimiter) { ++p; eState = STATE_FRAGMENT; } else if (*p == nSegmentDelimiter) { ++p; eState = STATE_ABS_PATH; bSameDoc = false; } else { eState = STATE_REL_PATH; bSameDoc = false; } } if (eState == STATE_ABS_PATH) { aSynAbsURIRef.append('/'); eState = STATE_DONE; while (p != pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType); if (eEscapeType == EscapeType::NONE) { if (nUTF32 == nFragmentDelimiter) { eState = STATE_FRAGMENT; break; } else if (nUTF32 == nSegmentDelimiter) nUTF32 = '/'; } appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart, eCharset, true); } } else if (eState == STATE_REL_PATH) { if (!getSchemeInfo().m_bHierarchical) { // Detect cases where a relative input could not be made absolute // because the given base URL is broken (most probably because it is // empty): SAL_WARN_IF( HasError(), "tools.urlobj", "cannot make <" << rTheRelURIRef << "> absolute against broken base <" << GetMainURL(DecodeMechanism::NONE) << ">"); rWasAbsolute = false; return false; } sal_Unicode const * pBasePathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength(); while (pBasePathEnd != pBasePathBegin) if (*(--pBasePathEnd) == '/') { ++pBasePathEnd; break; } sal_Int32 nPathBegin = aSynAbsURIRef.getLength(); aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin); DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/', "INetURLObject::convertRelToAbs(): Bad base path"); while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter) { if (*p == '.') { if (pEnd - p == 1 || p[1] == nSegmentDelimiter || p[1] == nQueryDelimiter || p[1] == nFragmentDelimiter) { ++p; if (p != pEnd && *p == nSegmentDelimiter) ++p; continue; } else if (pEnd - p >= 2 && p[1] == '.' && (pEnd - p == 2 || p[2] == nSegmentDelimiter || p[2] == nQueryDelimiter || p[2] == nFragmentDelimiter) && aSynAbsURIRef.getLength() - nPathBegin > 1) { p += 2; if (p != pEnd && *p == nSegmentDelimiter) ++p; sal_Int32 i = aSynAbsURIRef.getLength() - 2; while (i > nPathBegin && aSynAbsURIRef[i] != '/') --i; aSynAbsURIRef.setLength(i + 1); DBG_ASSERT( aSynAbsURIRef.getLength() > nPathBegin && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/', "INetURLObject::convertRelToAbs(): Bad base path"); continue; } } while (p != pEnd && *p != nSegmentDelimiter && *p != nQueryDelimiter && *p != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart, eCharset, true); } if (p != pEnd && *p == nSegmentDelimiter) { aSynAbsURIRef.append('/'); ++p; } } while (p != pEnd && *p != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart, eCharset, true); } if (p == pEnd) eState = STATE_DONE; else { ++p; eState = STATE_FRAGMENT; } } else if (bSameDoc) { aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(), m_aPath.getLength()); if (m_aQuery.isPresent()) aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aQuery.getBegin() - 1, m_aQuery.getLength() + 1); } if (eState == STATE_FRAGMENT && !bIgnoreFragment) { aSynAbsURIRef.append('#'); while (p != pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, PART_VISIBLE, eCharset, true); } } INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear()); if (aNewURI.HasError()) { // Detect cases where a relative input could not be made absolute // because the given base URL is broken (most probably because it is // empty): SAL_WARN_IF( HasError(), "tools.urlobj", "cannot make <" << rTheRelURIRef << "> absolute against broken base <" << GetMainURL(DecodeMechanism::NONE) << ">"); rWasAbsolute = false; return false; } rTheAbsURIRef = aNewURI; return true; } bool INetURLObject::convertAbsToRel(OUString const & rTheAbsURIRef, OUString & rTheRelURIRef, EncodeMechanism eEncodeMechanism, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset, FSysStyle eStyle) const { // Check for hierarchical base URL: if (!getSchemeInfo().m_bHierarchical) { rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset); return false; } // Convert the input (absolute or relative URI ref) to an absolute URI // ref: INetURLObject aSubject; bool bWasAbsolute; if (!convertRelToAbs(rTheAbsURIRef, aSubject, bWasAbsolute, eEncodeMechanism, eCharset, false, false, false, eStyle)) { rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset); return false; } // Check for differing scheme or authority parts: if ((m_aScheme.compare( aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef) != 0) || (m_aUser.compare( aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef) != 0) || (m_aAuth.compare( aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef) != 0) || (m_aHost.compare( aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef) != 0) || (m_aPort.compare( aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef) != 0)) { rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset); return false; } sal_Unicode const * pBasePathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength(); sal_Unicode const * pSubjectPathBegin = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin(); sal_Unicode const * pSubjectPathEnd = pSubjectPathBegin + aSubject.m_aPath.getLength(); // Make nMatch point past the last matching slash, or past the end of the // paths, in case they are equal: sal_Unicode const * pSlash = nullptr; sal_Unicode const * p1 = pBasePathBegin; sal_Unicode const * p2 = pSubjectPathBegin; for (;;) { if (p1 == pBasePathEnd || p2 == pSubjectPathEnd) { if (p1 == pBasePathEnd && p2 == pSubjectPathEnd) pSlash = p1; break; } sal_Unicode c = *p1++; if (c != *p2++) break; if (c == '/') pSlash = p1; } if (!pSlash) { // One of the paths does not start with '/': rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset); return false; } sal_Int32 nMatch = pSlash - pBasePathBegin; // If the two URLs are DOS file URLs starting with different volumes // (e.g., file:///a:/... and file:///b:/...), the subject is not made // relative (it could be, but some people do not like that): if (m_eScheme == INetProtocol::File && nMatch <= 1 && hasDosVolume(eStyle) && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these? { rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset); return false; } // For every slash in the base path after nMatch, a prefix of "../" is // added to the new relative URL (if the common prefix of the two paths is // only "/"---but see handling of file URLs above---, the complete subject // path could go into the new relative URL instead, but some people don't // like that): OUStringBuffer aSynRelURIRef; for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd; ++p) { if (*p == '/') aSynRelURIRef.append("../"); } // If the new relative URL would start with "//" (i.e., it would be // mistaken for a relative URL starting with an authority part), or if the // new relative URL would neither be empty nor start with <"/"> nor start // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting // with a scheme part), then the new relative URL is prefixed with "./": if (aSynRelURIRef.isEmpty()) { if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2 && pSubjectPathBegin[nMatch] == '/' && pSubjectPathBegin[nMatch + 1] == '/') { aSynRelURIRef.append("./"); } else { for (sal_Unicode const * p = pSubjectPathBegin + nMatch; p != pSubjectPathEnd && *p != '/'; ++p) { if (mustEncode(*p, PART_REL_SEGMENT_EXTRA)) { aSynRelURIRef.append("./"); break; } } } } // The remainder of the subject path, starting at nMatch, is appended to // the new relative URL: aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd, eDecodeMechanism, eCharset)); // If the subject has defined query or fragment parts, they are appended // to the new relative URL: if (aSubject.m_aQuery.isPresent()) { aSynRelURIRef.append('?'); aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery, eDecodeMechanism, eCharset)); } if (aSubject.m_aFragment.isPresent()) { aSynRelURIRef.append('#'); aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment, eDecodeMechanism, eCharset)); } rTheRelURIRef = aSynRelURIRef.makeStringAndClear(); return true; } // static bool INetURLObject::convertIntToExt(std::u16string_view rTheIntURIRef, OUString & rTheExtURIRef, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset) { OUStringBuffer aSynExtURIRef(256); encodeText(aSynExtURIRef, rTheIntURIRef, PART_VISIBLE, EncodeMechanism::NotCanonical, eCharset, true); sal_Unicode const * pBegin = aSynExtURIRef.getStr(); sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength(); sal_Unicode const * p = pBegin; PrefixInfo const * pPrefix = getPrefix(p, pEnd); bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::Kind::Internal; if (bConvert) { comphelper::string::replaceAt(aSynExtURIRef, 0, p - pBegin, OUString::createFromAscii(pPrefix->m_pTranslatedPrefix)); } rTheExtURIRef = decode(aSynExtURIRef, eDecodeMechanism, eCharset); return bConvert; } // static bool INetURLObject::convertExtToInt(std::u16string_view rTheExtURIRef, OUString & rTheIntURIRef, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset) { OUStringBuffer aSynIntURIRef(256); encodeText(aSynIntURIRef, rTheExtURIRef, PART_VISIBLE, EncodeMechanism::NotCanonical, eCharset, true); sal_Unicode const * pBegin = aSynIntURIRef.getStr(); sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength(); sal_Unicode const * p = pBegin; PrefixInfo const * pPrefix = getPrefix(p, pEnd); bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::Kind::External; if (bConvert) { comphelper::string::replaceAt(aSynIntURIRef, 0, p - pBegin, OUString::createFromAscii(pPrefix->m_pTranslatedPrefix)); } rTheIntURIRef = decode(aSynIntURIRef, eDecodeMechanism, eCharset); return bConvert; } // static INetURLObject::PrefixInfo const * INetURLObject::getPrefix(sal_Unicode const *& rBegin, sal_Unicode const * pEnd) { static PrefixInfo const aMap[] = { // dummy entry at front needed, because pLast may point here: { nullptr, nullptr, INetProtocol::NotValid, PrefixInfo::Kind::Internal }, { ".component:", "staroffice.component:", INetProtocol::Component, PrefixInfo::Kind::Internal }, { ".uno:", "staroffice.uno:", INetProtocol::Uno, PrefixInfo::Kind::Internal }, { "cid:", nullptr, INetProtocol::Cid, PrefixInfo::Kind::Official }, { "data:", nullptr, INetProtocol::Data, PrefixInfo::Kind::Official }, { "db:", "staroffice.db:", INetProtocol::Db, PrefixInfo::Kind::Internal }, { "file:", nullptr, INetProtocol::File, PrefixInfo::Kind::Official }, { "ftp:", nullptr, INetProtocol::Ftp, PrefixInfo::Kind::Official }, { "hid:", "staroffice.hid:", INetProtocol::Hid, PrefixInfo::Kind::Internal }, { "http:", nullptr, INetProtocol::Http, PrefixInfo::Kind::Official }, { "https:", nullptr, INetProtocol::Https, PrefixInfo::Kind::Official }, { "javascript:", nullptr, INetProtocol::Javascript, PrefixInfo::Kind::Official }, { "ldap:", nullptr, INetProtocol::Ldap, PrefixInfo::Kind::Official }, { "macro:", "staroffice.macro:", INetProtocol::Macro, PrefixInfo::Kind::Internal }, { "mailto:", nullptr, INetProtocol::Mailto, PrefixInfo::Kind::Official }, { "private:", "staroffice.private:", INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal }, { "private:factory/", "staroffice.factory:", INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal }, { "private:helpid/", "staroffice.helpid:", INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal }, { "private:java/", "staroffice.java:", INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal }, { "private:searchfolder:", "staroffice.searchfolder:", INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal }, { "private:trashcan:", "staroffice.trashcan:", INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal }, { "sftp:", nullptr, INetProtocol::Sftp, PrefixInfo::Kind::Official }, { "slot:", "staroffice.slot:", INetProtocol::Slot, PrefixInfo::Kind::Internal }, { "smb:", nullptr, INetProtocol::Smb, PrefixInfo::Kind::Official }, { "staroffice.component:", ".component:", INetProtocol::Component, PrefixInfo::Kind::External }, { "staroffice.db:", "db:", INetProtocol::Db, PrefixInfo::Kind::External }, { "staroffice.factory:", "private:factory/", INetProtocol::PrivSoffice, PrefixInfo::Kind::External }, { "staroffice.helpid:", "private:helpid/", INetProtocol::PrivSoffice, PrefixInfo::Kind::External }, { "staroffice.hid:", "hid:", INetProtocol::Hid, PrefixInfo::Kind::External }, { "staroffice.java:", "private:java/", INetProtocol::PrivSoffice, PrefixInfo::Kind::External }, { "staroffice.macro:", "macro:", INetProtocol::Macro, PrefixInfo::Kind::External }, { "staroffice.private:", "private:", INetProtocol::PrivSoffice, PrefixInfo::Kind::External }, { "staroffice.searchfolder:", "private:searchfolder:", INetProtocol::PrivSoffice, PrefixInfo::Kind::External }, { "staroffice.slot:", "slot:", INetProtocol::Slot, PrefixInfo::Kind::External }, { "staroffice.trashcan:", "private:trashcan:", INetProtocol::PrivSoffice, PrefixInfo::Kind::External }, { "staroffice.uno:", ".uno:", INetProtocol::Uno, PrefixInfo::Kind::External }, { "staroffice:", "private:", INetProtocol::PrivSoffice, PrefixInfo::Kind::External }, { "telnet:", nullptr, INetProtocol::Telnet, PrefixInfo::Kind::Official }, { "vnd.libreoffice.cmis:", nullptr, INetProtocol::Cmis, PrefixInfo::Kind::Internal }, { "vnd.sun.star.cmd:", nullptr, INetProtocol::VndSunStarCmd, PrefixInfo::Kind::Official }, { "vnd.sun.star.expand:", nullptr, INetProtocol::VndSunStarExpand, PrefixInfo::Kind::Official }, { "vnd.sun.star.help:", nullptr, INetProtocol::VndSunStarHelp, PrefixInfo::Kind::Official }, { "vnd.sun.star.hier:", nullptr, INetProtocol::VndSunStarHier, PrefixInfo::Kind::Official }, { "vnd.sun.star.pkg:", nullptr, INetProtocol::VndSunStarPkg, PrefixInfo::Kind::Official }, { "vnd.sun.star.tdoc:", nullptr, INetProtocol::VndSunStarTdoc, PrefixInfo::Kind::Official }, { "vnd.sun.star.webdav:", nullptr, INetProtocol::VndSunStarWebdav, PrefixInfo::Kind::Official } }; /* This list needs to be sorted, or you'll introduce serious bugs */ PrefixInfo const * pFirst = aMap + 1; PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1; PrefixInfo const * pMatch = nullptr; sal_Unicode const * pMatched = rBegin; sal_Unicode const * p = rBegin; sal_Int32 i = 0; for (; pFirst < pLast; ++i) { if (pFirst->m_pPrefix[i] == '\0') { pMatch = pFirst++; pMatched = p; } if (p >= pEnd) break; sal_uInt32 nChar = rtl::toAsciiLowerCase(*p++); while (pFirst <= pLast && static_cast(pFirst->m_pPrefix[i]) < nChar) ++pFirst; while (pFirst <= pLast && static_cast(pLast->m_pPrefix[i]) > nChar) --pLast; } if (pFirst == pLast) { char const * q = pFirst->m_pPrefix + i; while (p < pEnd && *q != '\0' && rtl::toAsciiLowerCase(*p) == static_cast(*q)) { ++p; ++q; } if (*q == '\0') { rBegin = p; return pFirst; } } rBegin = pMatched; return pMatch; } sal_Int32 INetURLObject::getAuthorityBegin() const { DBG_ASSERT(getSchemeInfo().m_bAuthority, "INetURLObject::getAuthority(): Bad scheme"); sal_Int32 nBegin; if (m_aUser.isPresent()) nBegin = m_aUser.getBegin(); else if (m_aHost.isPresent()) nBegin = m_aHost.getBegin(); else nBegin = m_aPath.getBegin(); nBegin -= RTL_CONSTASCII_LENGTH("//"); DBG_ASSERT(m_aAbsURIRef[nBegin] == '/' && m_aAbsURIRef[nBegin + 1] == '/', "INetURLObject::getAuthority(): Bad authority"); return nBegin; } INetURLObject::SubString INetURLObject::getAuthority() const { sal_Int32 nBegin = getAuthorityBegin(); sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() : m_aHost.isPresent() ? m_aHost.getEnd() : m_aAuth.isPresent() ? m_aAuth.getEnd() : m_aUser.isPresent() ? m_aUser.getEnd() : nBegin + RTL_CONSTASCII_LENGTH("//"); return SubString(nBegin, nEnd - nBegin); } bool INetURLObject::setUser(std::u16string_view rTheUser, rtl_TextEncoding eCharset) { if ( !getSchemeInfo().m_bUser ) { return false; } OUStringBuffer aNewUser; encodeText(aNewUser, rTheUser, PART_USER_PASSWORD, EncodeMechanism::WasEncoded, eCharset, false); sal_Int32 nDelta; if (m_aUser.isPresent()) nDelta = m_aUser.set(m_aAbsURIRef, aNewUser); else if (m_aHost.isPresent()) { m_aAbsURIRef.insert(m_aHost.getBegin(), u'@'); nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1; } else if (getSchemeInfo().m_bHost) return false; else nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin()); m_aAuth += nDelta; m_aHost += nDelta; m_aPort += nDelta; m_aPath += nDelta; m_aQuery += nDelta; m_aFragment += nDelta; return true; } namespace { void lcl_Erase(OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count) { OUString sTemp(rBuf.makeStringAndClear()); rBuf.append(sTemp.replaceAt(index, count, u"")); } } bool INetURLObject::clearPassword() { if (!getSchemeInfo().m_bPassword) return false; if (m_aAuth.isPresent()) { lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1, m_aAuth.getLength() + 1); sal_Int32 nDelta = m_aAuth.clear() - 1; m_aHost += nDelta; m_aPort += nDelta; m_aPath += nDelta; m_aQuery += nDelta; m_aFragment += nDelta; } return true; } bool INetURLObject::setPassword(std::u16string_view rThePassword, rtl_TextEncoding eCharset) { if (!getSchemeInfo().m_bPassword) return false; OUStringBuffer aNewAuth; encodeText(aNewAuth, rThePassword, PART_USER_PASSWORD, EncodeMechanism::WasEncoded, eCharset, false); sal_Int32 nDelta; if (m_aAuth.isPresent()) nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth); else if (m_aUser.isPresent()) { m_aAbsURIRef.insert(m_aUser.getEnd(), u':'); nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1; } else if (m_aHost.isPresent()) { m_aAbsURIRef.insert(m_aHost.getBegin(), ":@" ); m_aUser.set(m_aAbsURIRef, std::u16string_view{}, m_aHost.getBegin()); nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2; } else if (getSchemeInfo().m_bHost) return false; else { m_aAbsURIRef.insert(m_aPath.getBegin(), u':'); m_aUser.set(m_aAbsURIRef, std::u16string_view{}, m_aPath.getBegin()); nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1; } m_aHost += nDelta; m_aPort += nDelta; m_aPath += nDelta; m_aQuery += nDelta; m_aFragment += nDelta; return true; } // static bool INetURLObject::parseHost(sal_Unicode const *& rBegin, sal_Unicode const * pEnd, OUStringBuffer* pCanonic) { // RFC 2373 is inconsistent about how to write an IPv6 address in which an // IPv4 address directly follows the abbreviating "::". The ABNF in // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly // mentions "::13:1.68.3". This algorithm accepts both variants: enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN, STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN, STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6, STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON, STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON, STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2, STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4, STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE }; sal_uInt32 nNumber = 0; int nDigits = 0; int nOctets = 0; State eState = STATE_INITIAL; sal_Unicode const * p = rBegin; sal_Int32 nOriginalCanonicLength = pCanonic ? pCanonic->getLength() : 0; for (; p != pEnd; ++p) switch (eState) { case STATE_INITIAL: if (*p == '[') { if (pCanonic) pCanonic->append('['); eState = STATE_IP6; } else if (rtl::isAsciiAlpha(*p) || *p == '_') eState = STATE_TOPLABEL; else if (rtl::isAsciiDigit(*p)) { nNumber = INetMIME::getWeight(*p); nDigits = 1; nOctets = 1; eState = STATE_IP4; } else goto done; break; case STATE_LABEL: if (*p == '.') eState = STATE_LABEL_DOT; else if (*p == '-') eState = STATE_LABEL_HYPHEN; else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_') goto done; break; case STATE_LABEL_HYPHEN: if (rtl::isAsciiAlphanumeric(*p) || *p == '_') eState = STATE_LABEL; else if (*p != '-') goto done; break; case STATE_LABEL_DOT: if (rtl::isAsciiAlpha(*p) || *p == '_') eState = STATE_TOPLABEL; else if (rtl::isAsciiDigit(*p)) eState = STATE_LABEL; else goto done; break; case STATE_TOPLABEL: if (*p == '.') eState = STATE_TOPLABEL_DOT; else if (*p == '-') eState = STATE_TOPLABEL_HYPHEN; else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_') goto done; break; case STATE_TOPLABEL_HYPHEN: if (rtl::isAsciiAlphanumeric(*p) || *p == '_') eState = STATE_TOPLABEL; else if (*p != '-') goto done; break; case STATE_TOPLABEL_DOT: if (rtl::isAsciiAlpha(*p) || *p == '_') eState = STATE_TOPLABEL; else if (rtl::isAsciiDigit(*p)) eState = STATE_LABEL; else goto done; break; case STATE_IP4: if (*p == '.') if (nOctets < 4) { if (pCanonic) { pCanonic->append(static_cast(nNumber)); pCanonic->append( '.' ); } ++nOctets; eState = STATE_IP4_DOT; } else eState = STATE_LABEL_DOT; else if (*p == '-') eState = STATE_LABEL_HYPHEN; else if (rtl::isAsciiAlpha(*p) || *p == '_') eState = STATE_LABEL; else if (rtl::isAsciiDigit(*p)) if (nDigits < 3) { nNumber = 10 * nNumber + INetMIME::getWeight(*p); ++nDigits; } else eState = STATE_LABEL; else goto done; break; case STATE_IP4_DOT: if (rtl::isAsciiAlpha(*p) || *p == '_') eState = STATE_TOPLABEL; else if (rtl::isAsciiDigit(*p)) { nNumber = INetMIME::getWeight(*p); nDigits = 1; eState = STATE_IP4; } else goto done; break; case STATE_IP6: if (*p == ':') eState = STATE_IP6_COLON; else if (rtl::isAsciiHexDigit(*p)) { nNumber = INetMIME::getHexWeight(*p); nDigits = 1; eState = STATE_IP6_HEXSEQ1; } else goto done; break; case STATE_IP6_COLON: if (*p == ':') { if (pCanonic) pCanonic->append("::"); eState = STATE_IP6_2COLON; } else goto done; break; case STATE_IP6_2COLON: if (*p == ']') eState = STATE_IP6_DONE; else if (*p == ':') { if (pCanonic) pCanonic->append(':'); eState = STATE_IP6_3COLON; } else if (rtl::isAsciiDigit(*p)) { nNumber = INetMIME::getWeight(*p); nDigits = 1; eState = STATE_IP6_HEXSEQ2_MAYBE_IP4; } else if (rtl::isAsciiHexDigit(*p)) { nNumber = INetMIME::getHexWeight(*p); nDigits = 1; eState = STATE_IP6_HEXSEQ2; } else goto done; break; case STATE_IP6_3COLON: if (rtl::isAsciiDigit(*p)) { nNumber = INetMIME::getWeight(*p); nDigits = 1; nOctets = 1; eState = STATE_IP6_IP4; } else goto done; break; case STATE_IP6_HEXSEQ1: if (*p == ']') { if (pCanonic) pCanonic->append( OUString::number(nNumber, 16)); eState = STATE_IP6_DONE; } else if (*p == ':') { if (pCanonic) { pCanonic->append( OUString::number(nNumber, 16)); pCanonic->append(':'); } eState = STATE_IP6_HEXSEQ1_COLON; } else if (rtl::isAsciiHexDigit(*p) && nDigits < 4) { nNumber = 16 * nNumber + INetMIME::getHexWeight(*p); ++nDigits; } else goto done; break; case STATE_IP6_HEXSEQ1_COLON: if (*p == ':') { if (pCanonic) pCanonic->append(':'); eState = STATE_IP6_2COLON; } else if (rtl::isAsciiDigit(*p)) { nNumber = INetMIME::getWeight(*p); nDigits = 1; eState = STATE_IP6_HEXSEQ1_MAYBE_IP4; } else if (rtl::isAsciiHexDigit(*p)) { nNumber = INetMIME::getHexWeight(*p); nDigits = 1; eState = STATE_IP6_HEXSEQ1; } else goto done; break; case STATE_IP6_HEXSEQ1_MAYBE_IP4: if (*p == ']') { if (pCanonic) pCanonic->append( OUString::number(nNumber, 16)); eState = STATE_IP6_DONE; } else if (*p == ':') { if (pCanonic) { pCanonic->append( OUString::number(nNumber, 16)); pCanonic->append(':'); } eState = STATE_IP6_HEXSEQ1_COLON; } else if (*p == '.') { nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15) + (nNumber & 15); if (pCanonic) { pCanonic->append( OUString::number(nNumber)); pCanonic->append('.'); } nOctets = 2; eState = STATE_IP6_IP4_DOT; } else if (rtl::isAsciiDigit(*p) && nDigits < 3) { nNumber = 16 * nNumber + INetMIME::getWeight(*p); ++nDigits; } else if (rtl::isAsciiHexDigit(*p) && nDigits < 4) { nNumber = 16 * nNumber + INetMIME::getHexWeight(*p); ++nDigits; eState = STATE_IP6_HEXSEQ1; } else goto done; break; case STATE_IP6_HEXSEQ2: if (*p == ']') { if (pCanonic) pCanonic->append( OUString::number(nNumber, 16)); eState = STATE_IP6_DONE; } else if (*p == ':') { if (pCanonic) { pCanonic->append( OUString::number(nNumber, 16)); pCanonic->append(':'); } eState = STATE_IP6_HEXSEQ2_COLON; } else if (rtl::isAsciiHexDigit(*p) && nDigits < 4) { nNumber = 16 * nNumber + INetMIME::getHexWeight(*p); ++nDigits; } else goto done; break; case STATE_IP6_HEXSEQ2_COLON: if (rtl::isAsciiDigit(*p)) { nNumber = INetMIME::getWeight(*p); nDigits = 1; eState = STATE_IP6_HEXSEQ2_MAYBE_IP4; } else if (rtl::isAsciiHexDigit(*p)) { nNumber = INetMIME::getHexWeight(*p); nDigits = 1; eState = STATE_IP6_HEXSEQ2; } else goto done; break; case STATE_IP6_HEXSEQ2_MAYBE_IP4: if (*p == ']') { if (pCanonic) pCanonic->append( OUString::number(nNumber, 16)); eState = STATE_IP6_DONE; } else if (*p == ':') { if (pCanonic) { pCanonic->append( OUString::number(nNumber, 16)); pCanonic->append(':'); } eState = STATE_IP6_HEXSEQ2_COLON; } else if (*p == '.') { nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15) + (nNumber & 15); if (pCanonic) { pCanonic->append( OUString::number(nNumber)); pCanonic->append('.'); } nOctets = 2; eState = STATE_IP6_IP4_DOT; } else if (rtl::isAsciiDigit(*p) && nDigits < 3) { nNumber = 16 * nNumber + INetMIME::getWeight(*p); ++nDigits; } else if (rtl::isAsciiHexDigit(*p) && nDigits < 4) { nNumber = 16 * nNumber + INetMIME::getHexWeight(*p); ++nDigits; eState = STATE_IP6_HEXSEQ2; } else goto done; break; case STATE_IP6_IP4: if (*p == ']') if (nOctets == 4) { if (pCanonic) pCanonic->append( OUString::number(nNumber)); eState = STATE_IP6_DONE; } else goto done; else if (*p == '.') if (nOctets < 4) { if (pCanonic) { pCanonic->append( OUString::number(nNumber)); pCanonic->append('.'); } ++nOctets; eState = STATE_IP6_IP4_DOT; } else goto done; else if (rtl::isAsciiDigit(*p) && nDigits < 3) { nNumber = 10 * nNumber + INetMIME::getWeight(*p); ++nDigits; } else goto done; break; case STATE_IP6_IP4_DOT: if (rtl::isAsciiDigit(*p)) { nNumber = INetMIME::getWeight(*p); nDigits = 1; eState = STATE_IP6_IP4; } else goto done; break; case STATE_IP6_DONE: goto done; } done: switch (eState) { case STATE_LABEL: case STATE_TOPLABEL: case STATE_TOPLABEL_DOT: if (pCanonic) { pCanonic->setLength(nOriginalCanonicLength); pCanonic->append(rBegin, p - rBegin); } rBegin = p; return true; case STATE_IP4: if (nOctets == 4) { if (pCanonic) pCanonic->append( OUString::number(nNumber)); rBegin = p; return true; } if (pCanonic) pCanonic->setLength(nOriginalCanonicLength); return false; case STATE_IP6_DONE: if (pCanonic) pCanonic->append(']'); rBegin = p; return true; default: if (pCanonic) pCanonic->setLength(nOriginalCanonicLength); return false; } } // static bool INetURLObject::parseHostOrNetBiosName( sal_Unicode const * pBegin, sal_Unicode const * pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName, OUStringBuffer* pCanonic) { if (pBegin >= pEnd) return true; sal_Int32 nOriginalCanonicLength = pCanonic ? pCanonic->getLength() : 0; if (sal_Unicode const* p = pBegin; parseHost(p, pEnd, pCanonic) && p == pEnd) return true; if (pCanonic) pCanonic->setLength(nOriginalCanonicLength); // discard parseHost results if (!bNetBiosName) return false; while (pBegin < pEnd) { EscapeType eEscapeType; switch (sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, eMechanism, eCharset, eEscapeType)) { default: if (INetMIME::isVisible(nUTF32)) { if (pCanonic) appendUCS4(*pCanonic, nUTF32, eEscapeType, PART_URIC, eCharset, true); break; } [[fallthrough]]; case '"': case '*': case '+': case ',': case '/': case ':': case ';': case '<': case '=': case '>': case '?': case '[': case '\\': case ']': case '`': case '|': if (pCanonic) pCanonic->setLength(nOriginalCanonicLength); return false; } } return true; } bool INetURLObject::setHost(std::u16string_view rTheHost, rtl_TextEncoding eCharset) { if (!getSchemeInfo().m_bHost) return false; OUStringBuffer aSynHost(rTheHost); bool bNetBiosName = false; switch (m_eScheme) { case INetProtocol::File: { if (OUString::unacquired(aSynHost).equalsIgnoreAsciiCase("localhost")) { aSynHost.setLength(0); } bNetBiosName = true; } break; case INetProtocol::Ldap: if (aSynHost.isEmpty() && m_aPort.isPresent()) return false; break; default: if (aSynHost.isEmpty()) return false; break; } if (!parseHostOrNetBiosName( aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(), EncodeMechanism::WasEncoded, eCharset, bNetBiosName, &aSynHost)) return false; sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost); m_aPort += nDelta; m_aPath += nDelta; m_aQuery += nDelta; m_aFragment += nDelta; return true; } // static bool INetURLObject::parsePath(INetProtocol eScheme, sal_Unicode const ** pBegin, sal_Unicode const * pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSkippedInitialSlash, sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter, sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter, OUStringBuffer &rSynPath) { DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param"); sal_Unicode const * pPos = *pBegin; const sal_Int32 nSynPathBeforeLen = rSynPath.getLength(); switch (eScheme) { case INetProtocol::NotValid: return false; case INetProtocol::Ftp: if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter) goto failed; while (pPos < pEnd && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rSynPath, nUTF32, eEscapeType, PART_HTTP_PATH, eCharset, true); } if (rSynPath.getLength() - nSynPathBeforeLen == 0) rSynPath.append('/'); break; case INetProtocol::Http: case INetProtocol::VndSunStarWebdav: case INetProtocol::Https: case INetProtocol::Smb: case INetProtocol::Cmis: if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter) goto failed; while (pPos < pEnd && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rSynPath, nUTF32, eEscapeType, PART_HTTP_PATH, eCharset, true); } if (rSynPath.getLength() - nSynPathBeforeLen == 0) rSynPath.append('/'); break; case INetProtocol::File: { if (bSkippedInitialSlash) rSynPath.append('/'); else if (pPos < pEnd && *pPos != nSegmentDelimiter && *pPos != nAltSegmentDelimiter) goto failed; while (pPos < pEnd && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); if (eEscapeType == EscapeType::NONE) { if (nUTF32 == nSegmentDelimiter || nUTF32 == nAltSegmentDelimiter) { rSynPath.append('/'); continue; } else if (nUTF32 == '|' && (pPos == pEnd || *pPos == nFragmentDelimiter || *pPos == nSegmentDelimiter || *pPos == nAltSegmentDelimiter) && rSynPath.getLength() - nSynPathBeforeLen == 2 && rtl::isAsciiAlpha(rSynPath[nSynPathBeforeLen + 1])) { // A first segment of is translated to // : rSynPath.append(':'); continue; } } appendUCS4(rSynPath, nUTF32, eEscapeType, PART_PCHAR, eCharset, true); } if (rSynPath.getLength() - nSynPathBeforeLen == 0) rSynPath.append('/'); break; } case INetProtocol::Mailto: while (pPos < pEnd && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rSynPath, nUTF32, eEscapeType, PART_MAILTO, eCharset, true); } break; case INetProtocol::PrivSoffice: case INetProtocol::Slot: case INetProtocol::Hid: case INetProtocol::Macro: case INetProtocol::Uno: case INetProtocol::Component: case INetProtocol::Ldap: while (pPos < pEnd && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rSynPath, nUTF32, eEscapeType, PART_PATH_BEFORE_QUERY, eCharset, true); } break; case INetProtocol::VndSunStarHelp: if (pPos == pEnd || *pPos == nQueryDelimiter || *pPos == nFragmentDelimiter) rSynPath.append('/'); else { if (*pPos != '/') goto failed; while (pPos < pEnd && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rSynPath, nUTF32, eEscapeType, PART_HTTP_PATH, eCharset, true); } } break; case INetProtocol::Javascript: case INetProtocol::Data: case INetProtocol::Cid: case INetProtocol::Db: while (pPos < pEnd && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rSynPath, nUTF32, eEscapeType, PART_URIC, eCharset, true); } break; case INetProtocol::VndSunStarHier: case INetProtocol::VndSunStarPkg: if (pPos < pEnd && *pPos != '/' && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter) goto failed; while (pPos < pEnd && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); if (eEscapeType == EscapeType::NONE && nUTF32 == '/') rSynPath.append('/'); else appendUCS4(rSynPath, nUTF32, eEscapeType, PART_PCHAR, eCharset, false); } if (rSynPath.getLength() - nSynPathBeforeLen == 0) rSynPath.append('/'); break; case INetProtocol::VndSunStarCmd: case INetProtocol::VndSunStarExpand: { if (pPos == pEnd || *pPos == nFragmentDelimiter) goto failed; Part ePart = PART_URIC_NO_SLASH; while (pPos != pEnd && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rSynPath, nUTF32, eEscapeType, ePart, eCharset, true); ePart = PART_URIC; } break; } case INetProtocol::Telnet: if (pPos < pEnd) { if (*pPos != '/' || pEnd - pPos > 1) goto failed; ++pPos; } rSynPath.append('/'); break; case INetProtocol::VndSunStarTdoc: if (pPos == pEnd || *pPos != '/') goto failed; while (pPos < pEnd && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); if (eEscapeType == EscapeType::NONE && nUTF32 == '/') rSynPath.append('/'); else appendUCS4(rSynPath, nUTF32, eEscapeType, PART_PCHAR, eCharset, false); } break; case INetProtocol::Generic: case INetProtocol::Sftp: while (pPos < pEnd && *pPos != nFragmentDelimiter) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rSynPath, nUTF32, eEscapeType, PART_URIC, eCharset, true); } if (rSynPath.isEmpty()) goto failed; break; default: OSL_ASSERT(false); break; } *pBegin = pPos; return true; failed: rSynPath.setLength(nSynPathBeforeLen); return false; } bool INetURLObject::setPath(std::u16string_view rThePath, EncodeMechanism eMechanism, rtl_TextEncoding eCharset) { OUStringBuffer aSynPath(256); sal_Unicode const * p = rThePath.data(); sal_Unicode const * pEnd = p + rThePath.size(); if (!parsePath(m_eScheme, &p, pEnd, eMechanism, eCharset, false, '/', 0x80000000, 0x80000000, 0x80000000, aSynPath) || p != pEnd) return false; sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath); m_aQuery += nDelta; m_aFragment += nDelta; return true; } bool INetURLObject::checkHierarchical() const { if (m_eScheme == INetProtocol::VndSunStarExpand) { OSL_FAIL( "INetURLObject::checkHierarchical vnd.sun.star.expand"); return true; } else { return getSchemeInfo().m_bHierarchical; } } bool INetURLObject::Append(std::u16string_view rTheSegment, EncodeMechanism eMechanism, rtl_TextEncoding eCharset) { return insertName(rTheSegment, false, LAST_SEGMENT, eMechanism, eCharset); } INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash) const { DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT, "INetURLObject::getSegment(): Bad index"); if (!checkHierarchical()) return SubString(); sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); sal_Unicode const * pSegBegin; sal_Unicode const * pSegEnd; if (nIndex == LAST_SEGMENT) { pSegEnd = pPathEnd; if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/') --pSegEnd; if (pSegEnd <= pPathBegin) return SubString(); pSegBegin = pSegEnd - 1; while (pSegBegin > pPathBegin && *pSegBegin != '/') --pSegBegin; } else { pSegBegin = pPathBegin; while (nIndex-- > 0) do { ++pSegBegin; if (pSegBegin >= pPathEnd) return SubString(); } while (*pSegBegin != '/'); pSegEnd = pSegBegin + 1; while (pSegEnd < pPathEnd && *pSegEnd != '/') ++pSegEnd; } return SubString(pSegBegin - m_aAbsURIRef.getStr(), pSegEnd - pSegBegin); } bool INetURLObject::insertName(std::u16string_view rTheName, bool bAppendFinalSlash, sal_Int32 nIndex, EncodeMechanism eMechanism, rtl_TextEncoding eCharset) { DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT, "INetURLObject::insertName(): Bad index"); if (!checkHierarchical()) return false; sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); sal_Unicode const * pPrefixEnd; bool bInsertSlash; sal_Unicode const * pSuffixBegin; if (nIndex == LAST_SEGMENT) { pPrefixEnd = pPathEnd; if (pPrefixEnd > pPathBegin && pPrefixEnd[-1] == '/') { --pPrefixEnd; } bInsertSlash = bAppendFinalSlash; pSuffixBegin = pPathEnd; } else if (nIndex == 0) { pPrefixEnd = pPathBegin; bInsertSlash = (pPathBegin < pPathEnd && *pPathBegin != '/') || (pPathBegin == pPathEnd && bAppendFinalSlash); pSuffixBegin = (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' && !bAppendFinalSlash) ? pPathEnd : pPathBegin; } else { pPrefixEnd = pPathBegin; sal_Unicode const * pEnd = pPathEnd; if (pEnd > pPathBegin && pEnd[-1] == '/') --pEnd; bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/'; bInsertSlash = false; pSuffixBegin = pPathEnd; while (nIndex-- > 0) for (;;) { if (bSkip) ++pPrefixEnd; bSkip = true; if (pPrefixEnd >= pEnd) { if (nIndex == 0) { bInsertSlash = bAppendFinalSlash; break; } else return false; } if (*pPrefixEnd == '/') { pSuffixBegin = pPrefixEnd; break; } } } OUStringBuffer aNewPath(256); aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin); aNewPath.append('/'); encodeText(aNewPath, rTheName, PART_PCHAR, eMechanism, eCharset, true); if (bInsertSlash) { aNewPath.append('/'); } aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin); return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8); } void INetURLObject::clearQuery() { if (HasError()) return; if (m_aQuery.isPresent()) { lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1, m_aQuery.getLength() + 1); m_aFragment += m_aQuery.clear() - 1; } } bool INetURLObject::setQuery(std::u16string_view rTheQuery, EncodeMechanism eMechanism, rtl_TextEncoding eCharset) { if (!getSchemeInfo().m_bQuery) return false; OUStringBuffer aNewQuery; encodeText(aNewQuery, rTheQuery, PART_URIC, eMechanism, eCharset, true); sal_Int32 nDelta; if (m_aQuery.isPresent()) nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery); else { m_aAbsURIRef.insert(m_aPath.getEnd(), u'?'); nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1) + 1; } m_aFragment += nDelta; return true; } bool INetURLObject::clearFragment() { if (HasError()) return false; if (m_aFragment.isPresent()) { m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1); m_aFragment.clear(); } return true; } bool INetURLObject::setFragment(std::u16string_view rTheFragment, EncodeMechanism eMechanism, rtl_TextEncoding eCharset) { if (HasError()) return false; OUStringBuffer aNewFragment; encodeText(aNewFragment, rTheFragment, PART_URIC, eMechanism, eCharset, true); if (m_aFragment.isPresent()) m_aFragment.set(m_aAbsURIRef, aNewFragment); else { m_aAbsURIRef.append('#'); m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength()); } return true; } bool INetURLObject::hasDosVolume(FSysStyle eStyle) const { sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin(); return (eStyle & FSysStyle::Dos) && m_aPath.getLength() >= 3 && p[0] == '/' && rtl::isAsciiAlpha(p[1]) && p[2] == ':' && (m_aPath.getLength() == 3 || p[3] == '/'); } // static void INetURLObject::encodeText( OUStringBuffer& rOutputBuffer, sal_Unicode const * pBegin, sal_Unicode const * pEnd, Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bKeepVisibleEscapes) { while (pBegin < pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, eMechanism, eCharset, eEscapeType); appendUCS4(rOutputBuffer, nUTF32, eEscapeType, ePart, eCharset, bKeepVisibleEscapes); } } // static OUString INetURLObject::decode(sal_Unicode const * pBegin, sal_Unicode const * pEnd, DecodeMechanism eMechanism, rtl_TextEncoding eCharset) { switch (eMechanism) { case DecodeMechanism::NONE: return OUString(pBegin, pEnd - pBegin); case DecodeMechanism::ToIUri: eCharset = RTL_TEXTENCODING_UTF8; break; default: break; } OUStringBuffer aResult(static_cast(pEnd-pBegin)); while (pBegin < pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, EncodeMechanism::WasEncoded, eCharset, eEscapeType); switch (eEscapeType) { case EscapeType::NONE: aResult.appendUtf32(nUTF32); break; case EscapeType::Octet: appendEscape(aResult, nUTF32); break; case EscapeType::Utf32: if ( rtl::isAscii(nUTF32) && ( eMechanism == DecodeMechanism::ToIUri || ( eMechanism == DecodeMechanism::Unambiguous && mustEncode(nUTF32, PART_UNAMBIGUOUS) ) ) ) { appendEscape(aResult, nUTF32); } else aResult.appendUtf32(nUTF32); break; } } return aResult.makeStringAndClear(); } OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { INetURLObject aTemp(*this); aTemp.clearPassword(); return aTemp.GetMainURL(eMechanism, eCharset); } OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { INetURLObject aTemp(*this); aTemp.clearFragment(); return aTemp.GetMainURL(eMechanism, eCharset); } OUString INetURLObject::getAbbreviated( uno::Reference< util::XStringWidth > const & rStringWidth, sal_Int32 nWidth, DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { OSL_ENSURE(rStringWidth.is(), "specification violation"); OUStringBuffer aBuffer; // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme // is empty ("") in that case, so take the scheme from m_aAbsURIRef if (m_eScheme != INetProtocol::Generic) { aBuffer.append(getSchemeInfo().m_sScheme.asView()); } else { if (!m_aAbsURIRef.isEmpty()) { sal_Unicode const * pSchemeBegin = m_aAbsURIRef.getStr(); sal_Unicode const * pSchemeEnd = pSchemeBegin; while (pSchemeEnd[0] != ':') { ++pSchemeEnd; } aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin); } } aBuffer.append(':'); bool bAuthority = getSchemeInfo().m_bAuthority; sal_Unicode const * pCoreBegin = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() : m_aPath.getBegin()); sal_Unicode const * pCoreEnd = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength(); bool bSegment = false; if (getSchemeInfo().m_bHierarchical) { OUString aRest; if (m_aQuery.isPresent()) aRest = "?..."; else if (m_aFragment.isPresent()) aRest = "#..."; OUStringBuffer aTrailer; sal_Unicode const * pBegin = pCoreBegin; sal_Unicode const * pEnd = pCoreEnd; sal_Unicode const * pPrefixBegin = pBegin; sal_Unicode const * pSuffixEnd = pEnd; bool bPrefix = true; bool bSuffix = true; do { if (bSuffix) { sal_Unicode const * p = pSuffixEnd - 1; if (pSuffixEnd == pCoreEnd && *p == '/') --p; while (*p != '/') --p; if (bAuthority && p == pCoreBegin + 1) --p; OUString aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ? 1 : 0), pSuffixEnd, eMechanism, eCharset)); pSuffixEnd = p; OUStringBuffer aResult(aBuffer); if (pSuffixEnd != pBegin) aResult.append("..."); aResult.append(aSegment); aResult.append(aTrailer); aResult.append(aRest); if (rStringWidth-> queryStringWidth(aResult.makeStringAndClear()) <= nWidth) { aTrailer.insert(0, aSegment); bSegment = true; pEnd = pSuffixEnd; } else bSuffix = false; if (pPrefixBegin > pSuffixEnd) pPrefixBegin = pSuffixEnd; if (pBegin == pEnd) break; } if (bPrefix) { sal_Unicode const * p = pPrefixBegin + (bAuthority && pPrefixBegin == pCoreBegin ? 2 : 1); OSL_ASSERT(p <= pEnd); while (p < pEnd && *p != '/') ++p; if (p == pCoreEnd - 1 && *p == '/') ++p; OUString aSegment(decode(pPrefixBegin + (pPrefixBegin == pCoreBegin ? 0 : 1), p == pEnd ? p : p + 1, eMechanism, eCharset)); pPrefixBegin = p; OUStringBuffer aResult(aBuffer); aResult.append(aSegment); if (pPrefixBegin != pEnd) aResult.append("..."); aResult.append(aTrailer); aResult.append(aRest); if (rStringWidth-> queryStringWidth(aResult.makeStringAndClear()) <= nWidth) { aBuffer.append(aSegment); bSegment = true; pBegin = pPrefixBegin; } else bPrefix = false; if (pPrefixBegin > pSuffixEnd) pSuffixEnd = pPrefixBegin; if (pBegin == pEnd) break; } } while (bPrefix || bSuffix); if (bSegment) { if (pPrefixBegin != pBegin || pSuffixEnd != pEnd) aBuffer.append("..."); aBuffer.append(aTrailer); } } if (!bSegment) aBuffer.append(decode(pCoreBegin, pCoreEnd, eMechanism, eCharset)); if (m_aQuery.isPresent()) { aBuffer.append('?'); aBuffer.append(decode(m_aQuery, eMechanism, eCharset)); } if (m_aFragment.isPresent()) { aBuffer.append('#'); aBuffer.append(decode(m_aFragment, eMechanism, eCharset)); } if (!aBuffer.isEmpty()) { OUStringBuffer aResult(aBuffer); if (rStringWidth->queryStringWidth(aResult.makeStringAndClear()) > nWidth) for (sal_Int32 i = aBuffer.getLength();;) { if (i == 0) { aBuffer.setLength(aBuffer.getLength() - 1); if (aBuffer.isEmpty()) break; } else { aBuffer.setLength(--i); aBuffer.append("..."); } aResult = aBuffer; if (rStringWidth-> queryStringWidth(aResult.makeStringAndClear()) <= nWidth) break; } } return aBuffer.makeStringAndClear(); } bool INetURLObject::operator ==(INetURLObject const & rObject) const { if (m_eScheme != rObject.m_eScheme) return false; if (m_eScheme == INetProtocol::NotValid) return std::u16string_view(m_aAbsURIRef) == std::u16string_view(rObject.m_aAbsURIRef); if ((m_aScheme.compare( rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef) != 0) || GetUser(DecodeMechanism::NONE) != rObject.GetUser(DecodeMechanism::NONE) || GetPass(DecodeMechanism::NONE) != rObject.GetPass(DecodeMechanism::NONE) || !GetHost(DecodeMechanism::NONE).equalsIgnoreAsciiCase( rObject.GetHost(DecodeMechanism::NONE)) || GetPort() != rObject.GetPort() || HasParam() != rObject.HasParam() || GetParam() != rObject.GetParam()) return false; OUString aPath1(GetURLPath(DecodeMechanism::NONE)); OUString aPath2(rObject.GetURLPath(DecodeMechanism::NONE)); switch (m_eScheme) { case INetProtocol::File: { // If the URL paths of two file URLs only differ in that one has a // final '/' and the other has not, take the two paths as // equivalent (this could be useful for other schemes, too): sal_Int32 nLength = aPath1.getLength(); switch (nLength - aPath2.getLength()) { case -1: if (aPath2[nLength] != '/') return false; break; case 0: break; case 1: if (aPath1[--nLength] != '/') return false; break; default: return false; } return aPath1.compareTo(aPath2, nLength) == 0; } default: return aPath1 == aPath2; } } bool INetURLObject::ConcatData(INetProtocol eTheScheme, std::u16string_view rTheUser, std::u16string_view rThePassword, std::u16string_view rTheHost, sal_uInt32 nThePort, std::u16string_view rThePath) { setInvalid(); m_eScheme = eTheScheme; if (HasError() || m_eScheme == INetProtocol::Generic) return false; m_aAbsURIRef.setLength(0); m_aAbsURIRef.append(getSchemeInfo().m_sScheme.asView()); m_aAbsURIRef.append(':'); if (getSchemeInfo().m_bAuthority) { m_aAbsURIRef.append("//"); bool bUserInfo = false; if (getSchemeInfo().m_bUser) { if (!rTheUser.empty()) { OUStringBuffer aNewUser; encodeText(aNewUser, rTheUser, PART_USER_PASSWORD, EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false); m_aUser.set(m_aAbsURIRef, aNewUser, m_aAbsURIRef.getLength()); bUserInfo = true; } } else if (!rTheUser.empty()) { setInvalid(); return false; } if (!rThePassword.empty()) { if (getSchemeInfo().m_bPassword) { m_aAbsURIRef.append(':'); OUStringBuffer aNewAuth; encodeText(aNewAuth, rThePassword, PART_USER_PASSWORD, EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false); m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aAbsURIRef.getLength()); bUserInfo = true; } else { setInvalid(); return false; } } if (bUserInfo && getSchemeInfo().m_bHost) m_aAbsURIRef.append('@'); if (getSchemeInfo().m_bHost) { OUStringBuffer aSynHost(rTheHost); bool bNetBiosName = false; switch (m_eScheme) { case INetProtocol::File: { if (OUString::unacquired(aSynHost).equalsIgnoreAsciiCase( "localhost" )) { aSynHost.setLength(0); } bNetBiosName = true; } break; case INetProtocol::Ldap: if (aSynHost.isEmpty() && nThePort != 0) { setInvalid(); return false; } break; default: if (aSynHost.isEmpty()) { setInvalid(); return false; } break; } if (!parseHostOrNetBiosName( aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(), EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, bNetBiosName, &aSynHost)) { setInvalid(); return false; } m_aHost.set(m_aAbsURIRef, aSynHost, m_aAbsURIRef.getLength()); if (nThePort != 0) { if (getSchemeInfo().m_bPort) { m_aAbsURIRef.append(':'); m_aPort.set(m_aAbsURIRef, OUString::number(nThePort), m_aAbsURIRef.getLength()); } else { setInvalid(); return false; } } } else if (!rTheHost.empty() || nThePort != 0) { setInvalid(); return false; } } OUStringBuffer aSynPath(256); sal_Unicode const * p = rThePath.data(); sal_Unicode const * pEnd = p + rThePath.size(); if (!parsePath(m_eScheme, &p, pEnd, EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false, '/', 0x80000000, 0x80000000, 0x80000000, aSynPath) || p != pEnd) { setInvalid(); return false; } m_aPath.set(m_aAbsURIRef, aSynPath, m_aAbsURIRef.getLength()); return true; } // static OUString INetURLObject::GetAbsURL(std::u16string_view rTheBaseURIRef, OUString const & rTheRelURIRef, EncodeMechanism eEncodeMechanism, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset) { // Backwards compatibility: if (rTheRelURIRef.isEmpty() || rTheRelURIRef[0] == '#') return rTheRelURIRef; INetURLObject aTheAbsURIRef; bool bWasAbsolute; return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset). convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, bWasAbsolute, eEncodeMechanism, eCharset, false, false, false, FSysStyle::Detect) || eEncodeMechanism != EncodeMechanism::WasEncoded || eDecodeMechanism != DecodeMechanism::ToIUri || eCharset != RTL_TEXTENCODING_UTF8 ? aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) : rTheRelURIRef; } OUString INetURLObject::getExternalURL() const { OUString aTheExtURIRef; translateToExternal( m_aAbsURIRef, aTheExtURIRef); return aTheExtURIRef; } bool INetURLObject::isSchemeEqualTo(std::u16string_view scheme) const { return m_aScheme.isPresent() && (rtl_ustr_compareIgnoreAsciiCase_WithLength( scheme.data(), scheme.size(), m_aAbsURIRef.getStr() + m_aScheme.getBegin(), m_aScheme.getLength()) == 0); } bool INetURLObject::isAnyKnownWebDAVScheme() const { return ( isSchemeEqualTo( INetProtocol::Http ) || isSchemeEqualTo( INetProtocol::Https ) || isSchemeEqualTo( INetProtocol::VndSunStarWebdav ) || isSchemeEqualTo( u"vnd.sun.star.webdavs" ) || isSchemeEqualTo( u"webdav" ) || isSchemeEqualTo( u"webdavs" )); } // static OUString INetURLObject::GetScheme(INetProtocol eTheScheme) { return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix); } // static const OUString & INetURLObject::GetSchemeName(INetProtocol eTheScheme) { return getSchemeInfo(eTheScheme).m_sScheme; } // static INetProtocol INetURLObject::CompareProtocolScheme(std::u16string_view aTheAbsURIRef) { sal_Unicode const * p = aTheAbsURIRef.data(); PrefixInfo const * pPrefix = getPrefix(p, p + aTheAbsURIRef.size()); return pPrefix ? pPrefix->m_eScheme : INetProtocol::NotValid; } OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and // PROT_VND_SUN_STAR_PKG misuse m_aHost: if (!getSchemeInfo().m_bHost) return OUString(); OUStringBuffer aHostPort(decode(m_aHost, eMechanism, eCharset)); if (m_aPort.isPresent()) { aHostPort.append(':'); aHostPort.append(decode(m_aPort, eMechanism, eCharset)); } return aHostPort.makeStringAndClear(); } sal_uInt32 INetURLObject::GetPort() const { if (m_aPort.isPresent()) { sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin(); sal_Unicode const * pEnd = p + m_aPort.getLength(); sal_uInt32 nThePort; if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd) return nThePort; } return 0; } bool INetURLObject::SetPort(sal_uInt32 nThePort) { if (getSchemeInfo().m_bPort && m_aHost.isPresent()) { sal_Int32 nDelta; if (m_aPort.isPresent()) nDelta = m_aPort.set(m_aAbsURIRef, OUString::number(nThePort)); else { m_aAbsURIRef.insert(m_aHost.getEnd(), u':'); nDelta = m_aPort.set(m_aAbsURIRef, OUString::number(nThePort), m_aHost.getEnd() + 1) + 1; } m_aPath += nDelta; m_aQuery += nDelta; m_aFragment += nDelta; return true; } return false; } sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const { if (!checkHierarchical()) return 0; sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pEnd = p + m_aPath.getLength(); if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/') --pEnd; sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1; while (p != pEnd) if (*p++ == '/') ++n; return n; } bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash) { SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); if (!aSegment.isPresent()) return false; OUStringBuffer aNewPath(m_aPath.getLength()); aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(), aSegment.getBegin() - m_aPath.getBegin()); if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd()) aNewPath.append('/'); else aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(), m_aPath.getEnd() - aSegment.getEnd()); if (aNewPath.isEmpty() && !aSegment.isEmpty() && m_aAbsURIRef[aSegment.getBegin()] == '/') { aNewPath.append('/'); } return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8); } OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash, DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); if (!aSegment.isPresent()) return OUString(); sal_Unicode const * pSegBegin = m_aAbsURIRef.getStr() + aSegment.getBegin(); sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); if (pSegBegin < pSegEnd && *pSegBegin == '/') ++pSegBegin; sal_Unicode const * p = pSegBegin; while (p != pSegEnd && *p != ';') ++p; return decode(pSegBegin, p, eMechanism, eCharset); } bool INetURLObject::setName(std::u16string_view rTheName, EncodeMechanism eMechanism, rtl_TextEncoding eCharset) { SubString aSegment(getSegment(LAST_SEGMENT, true)); if (!aSegment.isPresent()) return false; sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); sal_Unicode const * pSegBegin = m_aAbsURIRef.getStr() + aSegment.getBegin(); sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); if (pSegBegin < pSegEnd && *pSegBegin == '/') ++pSegBegin; sal_Unicode const * p = pSegBegin; while (p != pSegEnd && *p != ';') ++p; OUStringBuffer aNewPath(256); aNewPath.append(std::u16string_view(pPathBegin, pSegBegin - pPathBegin)); encodeText(aNewPath, rTheName, PART_PCHAR, eMechanism, eCharset, true); aNewPath.append(std::u16string_view(p, pPathEnd - p)); return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8); } bool INetURLObject::hasExtension() const { SubString aSegment(getSegment(LAST_SEGMENT, true/*bIgnoreFinalSlash*/)); if (!aSegment.isPresent()) return false; sal_Unicode const * pSegBegin = m_aAbsURIRef.getStr() + aSegment.getBegin(); sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); if (pSegBegin < pSegEnd && *pSegBegin == '/') ++pSegBegin; for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p) if (*p == '.' && p != pSegBegin) return true; return false; } OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash, DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); if (!aSegment.isPresent()) return OUString(); sal_Unicode const * pSegBegin = m_aAbsURIRef.getStr() + aSegment.getBegin(); sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); if (pSegBegin < pSegEnd && *pSegBegin == '/') ++pSegBegin; sal_Unicode const * pExtension = nullptr; sal_Unicode const * p = pSegBegin; for (; p != pSegEnd && *p != ';'; ++p) if (*p == '.' && p != pSegBegin) pExtension = p; if (!pExtension) pExtension = p; return decode(pSegBegin, pExtension, eMechanism, eCharset); } bool INetURLObject::setBase(std::u16string_view rTheBase, sal_Int32 nIndex, EncodeMechanism eMechanism, rtl_TextEncoding eCharset) { SubString aSegment(getSegment(nIndex, true/*bIgnoreFinalSlash*/)); if (!aSegment.isPresent()) return false; sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); sal_Unicode const * pSegBegin = m_aAbsURIRef.getStr() + aSegment.getBegin(); sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); if (pSegBegin < pSegEnd && *pSegBegin == '/') ++pSegBegin; sal_Unicode const * pExtension = nullptr; sal_Unicode const * p = pSegBegin; for (; p != pSegEnd && *p != ';'; ++p) if (*p == '.' && p != pSegBegin) pExtension = p; if (!pExtension) pExtension = p; OUStringBuffer aNewPath(256); aNewPath.append(std::u16string_view(pPathBegin, pSegBegin - pPathBegin)); encodeText(aNewPath, rTheBase, PART_PCHAR, eMechanism, eCharset, true); aNewPath.append(std::u16string_view(pExtension, pPathEnd - pExtension)); return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8); } OUString INetURLObject::getExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash, DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); if (!aSegment.isPresent()) return OUString(); sal_Unicode const * pSegBegin = m_aAbsURIRef.getStr() + aSegment.getBegin(); sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); if (pSegBegin < pSegEnd && *pSegBegin == '/') ++pSegBegin; sal_Unicode const * pExtension = nullptr; sal_Unicode const * p = pSegBegin; for (; p != pSegEnd && *p != ';'; ++p) if (*p == '.' && p != pSegBegin) pExtension = p; if (!pExtension) return OUString(); return decode(pExtension + 1, p, eMechanism, eCharset); } bool INetURLObject::setExtension(std::u16string_view rTheExtension, sal_Int32 nIndex, bool bIgnoreFinalSlash, rtl_TextEncoding eCharset) { SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); if (!aSegment.isPresent()) return false; sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); sal_Unicode const * pSegBegin = m_aAbsURIRef.getStr() + aSegment.getBegin(); sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); if (pSegBegin < pSegEnd && *pSegBegin == '/') ++pSegBegin; sal_Unicode const * pExtension = nullptr; sal_Unicode const * p = pSegBegin; for (; p != pSegEnd && *p != ';'; ++p) if (*p == '.' && p != pSegBegin) pExtension = p; if (!pExtension) pExtension = p; OUStringBuffer aNewPath(256); aNewPath.append(OUString::Concat(std::u16string_view(pPathBegin, pExtension - pPathBegin)) + "."); encodeText(aNewPath, rTheExtension, PART_PCHAR, EncodeMechanism::WasEncoded, eCharset, true); aNewPath.append(std::u16string_view(p, pPathEnd - p)); return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8); } bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash) { SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); if (!aSegment.isPresent()) return false; sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); sal_Unicode const * pSegBegin = m_aAbsURIRef.getStr() + aSegment.getBegin(); sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); if (pSegBegin < pSegEnd && *pSegBegin == '/') ++pSegBegin; sal_Unicode const * pExtension = nullptr; sal_Unicode const * p = pSegBegin; for (; p != pSegEnd && *p != ';'; ++p) if (*p == '.' && p != pSegBegin) pExtension = p; if (!pExtension) return true; OUString aNewPath = OUString::Concat(std::u16string_view(pPathBegin, pExtension - pPathBegin)) + std::u16string_view(p, pPathEnd - p); return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8); } bool INetURLObject::hasFinalSlash() const { if (!checkHierarchical()) return false; sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); return pPathEnd > pPathBegin && pPathEnd[-1] == '/'; } bool INetURLObject::setFinalSlash() { if (!checkHierarchical()) return false; sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); if (pPathEnd > pPathBegin && pPathEnd[-1] == '/') return true; OUString aNewPath = OUString::Concat(std::u16string_view(pPathBegin, pPathEnd - pPathBegin)) + "/"; return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8); } bool INetURLObject::removeFinalSlash() { if (!checkHierarchical()) return false; sal_Unicode const * pPathBegin = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/') return true; --pPathEnd; if (pPathEnd == pPathBegin && *pPathBegin == '/') return false; OUString aNewPath(pPathBegin, pPathEnd - pPathBegin); return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8); } OUString INetURLObject::getFSysPath(FSysStyle eStyle, sal_Unicode * pDelimiter) const { if (m_eScheme != INetProtocol::File) return OUString(); if (((eStyle & FSysStyle::Vos) ? 1 : 0) + ((eStyle & FSysStyle::Unix) ? 1 : 0) + ((eStyle & FSysStyle::Dos) ? 1 : 0) > 1) { if(eStyle & FSysStyle::Vos && m_aHost.isPresent() && m_aHost.getLength() > 0) { eStyle= FSysStyle::Vos; } else { if(hasDosVolume(eStyle) || ((eStyle & FSysStyle::Dos) && m_aHost.isPresent() && m_aHost.getLength() > 0)) { eStyle = FSysStyle::Dos; } else { if(eStyle & FSysStyle::Unix && (!m_aHost.isPresent() || m_aHost.getLength() == 0)) { eStyle = FSysStyle::Unix; } else { eStyle= FSysStyle(0); } } } } switch (eStyle) { case FSysStyle::Vos: { if (pDelimiter) *pDelimiter = '/'; OUStringBuffer aSynFSysPath; aSynFSysPath.append("//"); if (m_aHost.isPresent() && m_aHost.getLength() > 0) aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8)); else aSynFSysPath.append('.'); aSynFSysPath.append(decode(m_aPath, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8)); return aSynFSysPath.makeStringAndClear(); } case FSysStyle::Unix: { if (m_aHost.isPresent() && m_aHost.getLength() > 0) return OUString(); if (pDelimiter) *pDelimiter = '/'; return decode(m_aPath, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8); } case FSysStyle::Dos: { if (pDelimiter) *pDelimiter = '\\'; OUStringBuffer aSynFSysPath(64); if (m_aHost.isPresent() && m_aHost.getLength() > 0) { aSynFSysPath.append("\\\\"); aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8)); aSynFSysPath.append('\\'); } sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin(); sal_Unicode const * pEnd = p + m_aPath.getLength(); DBG_ASSERT(p < pEnd && *p == '/', "INetURLObject::getFSysPath(): Bad path"); ++p; while (p < pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p, pEnd, EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, eEscapeType); if (eEscapeType == EscapeType::NONE && nUTF32 == '/') aSynFSysPath.append('\\'); else aSynFSysPath.appendUtf32(nUTF32); } return aSynFSysPath.makeStringAndClear(); } default: return OUString(); } } // static void INetURLObject::appendUCS4Escape(OUStringBuffer & rTheText, sal_uInt32 nUCS4) { DBG_ASSERT(nUCS4 < 0x80000000, "INetURLObject::appendUCS4Escape(): Bad char"); if (nUCS4 < 0x80) appendEscape(rTheText, nUCS4); else if (nUCS4 < 0x800) { appendEscape(rTheText, nUCS4 >> 6 | 0xC0); appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80); } else if (nUCS4 < 0x10000) { appendEscape(rTheText, nUCS4 >> 12 | 0xE0); appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80); } else if (nUCS4 < 0x200000) { appendEscape(rTheText, nUCS4 >> 18 | 0xF0); appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80); } else if (nUCS4 < 0x4000000) { appendEscape(rTheText, nUCS4 >> 24 | 0xF8); appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80); } else { appendEscape(rTheText, nUCS4 >> 30 | 0xFC); appendEscape(rTheText, (nUCS4 >> 24 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80); appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80); } } // static void INetURLObject::appendUCS4(OUStringBuffer& rTheText, sal_uInt32 nUCS4, EscapeType eEscapeType, Part ePart, rtl_TextEncoding eCharset, bool bKeepVisibleEscapes) { bool bEscape; rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW; switch (eEscapeType) { case EscapeType::NONE: if (mustEncode(nUCS4, ePart)) { bEscape = true; eTargetCharset = RTL_TEXTENCODING_UTF8; } else bEscape = false; break; case EscapeType::Octet: bEscape = true; eTargetCharset = RTL_TEXTENCODING_ISO_8859_1; break; case EscapeType::Utf32: if (mustEncode(nUCS4, ePart)) { bEscape = true; eTargetCharset = eCharset; } else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4)) { bEscape = true; eTargetCharset = RTL_TEXTENCODING_ASCII_US; } else bEscape = false; break; default: bEscape = false; } if (bEscape) { switch (eTargetCharset) { default: OSL_FAIL("INetURLObject::appendUCS4(): Unsupported charset"); [[fallthrough]]; case RTL_TEXTENCODING_ASCII_US: case RTL_TEXTENCODING_ISO_8859_1: appendEscape(rTheText, nUCS4); break; case RTL_TEXTENCODING_UTF8: appendUCS4Escape(rTheText, nUCS4); break; } } else rTheText.append(sal_Unicode(nUCS4)); } // static sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin, sal_Unicode const * pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, EscapeType & rEscapeType) { DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence"); sal_uInt32 nUTF32 = INetMIME::getUTF32Character(rBegin, pEnd); switch (eMechanism) { case EncodeMechanism::All: rEscapeType = EscapeType::NONE; break; case EncodeMechanism::WasEncoded: { int nWeight1; int nWeight2; if (nUTF32 == static_cast('%') && rBegin + 1 < pEnd && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0) { rBegin += 2; nUTF32 = nWeight1 << 4 | nWeight2; switch (eCharset) { default: OSL_FAIL( "INetURLObject::getUTF32(): Unsupported charset"); [[fallthrough]]; case RTL_TEXTENCODING_ASCII_US: rEscapeType = rtl::isAscii(nUTF32) ? EscapeType::Utf32 : EscapeType::Octet; break; case RTL_TEXTENCODING_ISO_8859_1: rEscapeType = EscapeType::Utf32; break; case RTL_TEXTENCODING_UTF8: if (rtl::isAscii(nUTF32)) rEscapeType = EscapeType::Utf32; else { if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4) { sal_uInt32 nEncoded; int nShift; sal_uInt32 nMin; if (nUTF32 <= 0xDF) { nEncoded = (nUTF32 & 0x1F) << 6; nShift = 0; nMin = 0x80; } else if (nUTF32 <= 0xEF) { nEncoded = (nUTF32 & 0x0F) << 12; nShift = 6; nMin = 0x800; } else { nEncoded = (nUTF32 & 0x07) << 18; nShift = 12; nMin = 0x10000; } sal_Unicode const * p = rBegin; bool bUTF8 = true; for (;;) { if (pEnd - p < 3 || p[0] != '%' || (nWeight1 = INetMIME::getHexWeight(p[1])) < 8 || nWeight1 > 11 || (nWeight2 = INetMIME::getHexWeight(p[2])) < 0) { bUTF8 = false; break; } p += 3; nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; if (nShift == 0) break; nShift -= 6; } if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded) && nEncoded >= nMin) { rBegin = p; nUTF32 = nEncoded; rEscapeType = EscapeType::Utf32; break; } } rEscapeType = EscapeType::Octet; } break; } } else rEscapeType = EscapeType::NONE; break; } case EncodeMechanism::NotCanonical: { int nWeight1; int nWeight2; if (nUTF32 == static_cast('%') && rBegin + 1 < pEnd && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0) && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)) { rBegin += 2; nUTF32 = nWeight1 << 4 | nWeight2; rEscapeType = EscapeType::Octet; } else rEscapeType = EscapeType::NONE; break; } } return nUTF32; } // static sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin, sal_Unicode const * pEnd, bool bEager) { enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN }; State eState = STATE_DOT; sal_Int32 nLabels = 0; sal_Unicode const * pLastAlphanumeric = nullptr; for (sal_Unicode const * p = rBegin;; ++p) switch (eState) { case STATE_DOT: if (p != pEnd && (rtl::isAsciiAlphanumeric(*p) || *p == '_')) { ++nLabels; eState = STATE_LABEL; break; } if (bEager || nLabels == 0) return 0; rBegin = p - 1; return nLabels; case STATE_LABEL: if (p != pEnd) { if (rtl::isAsciiAlphanumeric(*p) || *p == '_') break; else if (*p == '.') { eState = STATE_DOT; break; } else if (*p == '-') { pLastAlphanumeric = p; eState = STATE_HYPHEN; break; } } rBegin = p; return nLabels; case STATE_HYPHEN: if (p != pEnd) { if (rtl::isAsciiAlphanumeric(*p) || *p == '_') { eState = STATE_LABEL; break; } else if (*p == '-') break; } if (bEager) return 0; rBegin = pLastAlphanumeric; return nLabels; } } // static bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin, sal_Unicode const * pEnd) { if (rBegin != pEnd && *rBegin == '[') { sal_Unicode const * p = rBegin + 1; //TODO: check for valid IPv6address (RFC 2373): while (p != pEnd && (rtl::isAsciiHexDigit(*p) || *p == ':' || *p == '.')) { ++p; } if (p != pEnd && *p == ']') { rBegin = p + 1; return true; } } return false; } OUString INetURLObject::GetPartBeforeLastName() const { if (!checkHierarchical()) return OUString(); INetURLObject aTemp(*this); aTemp.clearFragment(); aTemp.clearQuery(); aTemp.removeSegment(LAST_SEGMENT, false); aTemp.setFinalSlash(); return aTemp.GetMainURL(DecodeMechanism::ToIUri); } OUString INetURLObject::GetLastName(DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { return getName(LAST_SEGMENT, true, eMechanism, eCharset); } OUString INetURLObject::GetFileExtension() const { return getExtension(LAST_SEGMENT, false); } void INetURLObject::CutLastName() { INetURLObject aTemp(*this); aTemp.clearFragment(); aTemp.clearQuery(); if (!aTemp.removeSegment(LAST_SEGMENT, false)) return; *this = aTemp; } OUString INetURLObject::PathToFileName() const { if (m_eScheme != INetProtocol::File) return OUString(); OUString aSystemPath; if (osl::FileBase::getSystemPathFromFileURL( decode(m_aAbsURIRef.getStr(), m_aAbsURIRef.getStr() + m_aPath.getEnd(), DecodeMechanism::NONE, RTL_TEXTENCODING_UTF8), aSystemPath) != osl::FileBase::E_None) return OUString(); return aSystemPath; } OUString INetURLObject::GetFull() const { INetURLObject aTemp(*this); aTemp.removeFinalSlash(); return aTemp.PathToFileName(); } OUString INetURLObject::GetPath() const { INetURLObject aTemp(*this); aTemp.removeSegment(); aTemp.removeFinalSlash(); return aTemp.PathToFileName(); } void INetURLObject::SetBase(std::u16string_view rTheBase) { setBase(rTheBase, LAST_SEGMENT, EncodeMechanism::All); } OUString INetURLObject::GetBase() const { return getBase(LAST_SEGMENT, true, DecodeMechanism::WithCharset); } void INetURLObject::SetExtension(std::u16string_view rTheExtension) { setExtension(rTheExtension, LAST_SEGMENT, false); } OUString INetURLObject::CutExtension() { OUString aTheExtension(getExtension(LAST_SEGMENT, false)); return removeExtension(LAST_SEGMENT, false) ? aTheExtension : OUString(); } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */