diff options
Diffstat (limited to 'include/tools/urlobj.hxx')
-rw-r--r-- | include/tools/urlobj.hxx | 1305 |
1 files changed, 1305 insertions, 0 deletions
diff --git a/include/tools/urlobj.hxx b/include/tools/urlobj.hxx new file mode 100644 index 0000000000..7e7c4a2aaf --- /dev/null +++ b/include/tools/urlobj.hxx @@ -0,0 +1,1305 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef INCLUDED_TOOLS_URLOBJ_HXX +#define INCLUDED_TOOLS_URLOBJ_HXX + +#include <tools/toolsdllapi.h> +#include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> +#include <sal/types.h> +#include <o3tl/typed_flags_set.hxx> + +#include <memory> +#include <string_view> + +class SvMemoryStream; + +namespace com::sun::star::util { + class XStringWidth; +} + +namespace com::sun::star::uno { template <typename > class Reference; } + +// Common URL prefixes for various schemes: +inline constexpr OUString INET_FTP_SCHEME = u"ftp://"_ustr; +inline constexpr OUString INET_HTTP_SCHEME = u"http://"_ustr; +inline constexpr OUString INET_HTTPS_SCHEME = u"https://"_ustr; +inline constexpr OUString INET_FILE_SCHEME = u"file://"_ustr; +inline constexpr OUString INET_MAILTO_SCHEME = u"mailto:"_ustr; +inline constexpr OUString INET_HID_SCHEME = u"hid:"_ustr; + +#define URL_PREFIX_PRIV_SOFFICE "private:" + +// Schemes: +enum class INetProtocol +{ + NotValid, + Ftp, + Http, + File, + Mailto, + VndSunStarWebdav, + PrivSoffice, + VndSunStarHelp, + Https, + Slot, + Macro, + Javascript, + Data, + Cid, + VndSunStarHier, + Uno, + Component, + VndSunStarPkg, + Ldap, + Db, + VndSunStarCmd, + Telnet, + VndSunStarExpand, + VndSunStarTdoc, + Generic, + Smb, + Hid, + Sftp, + Cmis, + LAST = Cmis +}; + +/** The supported notations for file system paths. + */ +enum class FSysStyle +{ + /** VOS notation (e.g., "//server/dir/file"). + */ + Vos = 0x1, + + /** Unix notation (e.g., "/dir/file"). + */ + Unix = 0x2, + + /** DOS notation (e.g., "a:\dir\file" and "\\server\dir\file"). + */ + Dos = 0x4, + + /** Detect the used notation. + + @descr For the following descriptions, please note that + whereas FSYS_DEFAULT includes all style bits, combinations of only + a few style bits are also possible, and are also described. + + @descr When used to translate a file system path to a file URL, + the subset of the following productions for which the appropriate + style bit is set are checked in order (using the conventions of + RFC 2234, RFC 2396, and RFC 2732; UCS4 stands for any UCS4 + character): + + Production T1 (VOS local; FSysStyle::Vos only): + "//." ["/" *UCS4] + becomes + "file:///" *UCS4 + + Production T2 (VOS host; FSysStyle::Vos only): + "//" [host] ["/" *UCS4] + becomes + "file://" host "/" *UCS4 + + Production T3 (UNC; FSysStyle::Dos only): + "\\" [host] ["\" *UCS4] + becomes + "file://" host "/" *UCS4 + replacing "\" by "/" within <*UCS4> + + Production T4 (Unix-like DOS; FSysStyle::Dos only): + ALPHA ":" ["/" *UCS4] + becomes + "file:///" ALPHA ":/" *UCS4 + replacing "\" by "/" within <*UCS4> + + Production T5 (DOS; FSysStyle::Dos only): + ALPHA ":" ["\" *UCS4] + becomes + "file:///" ALPHA ":/" *UCS4 + replacing "\" by "/" within <*UCS4> + + Production T6 (any): + *UCS4 + becomes + "file:///" *UCS4 + replacing the delimiter by "/" within <*UCS4>. The delimiter is + that character from the set { "/", "\" } which appears most + often in <*UCS4> (if FSysStyle::Unix is not among the style bits, "/" + is removed from the set; if FSysStyle::Dos is not among the style + bits, "\" is removed from the set). If two or more + characters appear the same number of times, the character + mentioned first in that set is chosen. If the first character + of <*UCS4> is the delimiter, that character is not copied. + + @descr When used to translate a file URL to a file system path, + the following productions are checked in order (using the + conventions of RFC 2234, RFC 2396, and RFC 2732): + + Production F1 (VOS; FSysStyle::Vos): + "file://" host "/" fpath ["#" fragment] + becomes + "//" host "/" fpath + + Production F2 (DOS; FSysStyle::Dos): + "file:///" ALPHA ":" ["/" fpath] ["#" fragment] + becomes + ALPHA ":" ["\" fpath] + replacing "/" by "\" in <fpath> + + Production F3 (Unix; FSysStyle::Unix): + "file:///" fpath ["#" fragment] + becomes + "/" fpath + */ + Detect = Vos | Unix | Dos +}; +namespace o3tl { + template<> struct typed_flags<FSysStyle> : is_typed_flags<FSysStyle, 0x07> {}; +} + +class SAL_WARN_UNUSED TOOLS_DLLPUBLIC INetURLObject +{ +public: + // Get- and Set-Methods: + + /** The way input strings that represent (parts of) URIs are interpreted + in set-methods. + + @descr UTF-32 characters in the range 0x80--0x10FFFF are replaced by + sequences of escape sequences, representing the UTF-8 coded characters. + + @descr Along with an EncodeMechanism parameter, the set-methods all + take an rtl_TextEncoding parameter, which is ignored unless the + EncodeMechanism is EncodeMechanism::WasEncoded. + */ + enum class EncodeMechanism + { + /** All escape sequences that are already present are ignored, and are + interpreted as literal sequences of three characters. + */ + All, + + /** Sequences of escape sequences, that represent characters from the + specified character set and that can be converted to UTF-32 + characters, are first decoded. If they have to be encoded, they + are converted to UTF-8 characters and are than translated into + (sequences of) escape sequences. Other escape sequences are + copied verbatim (but using upper case hex digits). + */ + WasEncoded, + + /** All escape sequences that are already present are copied verbatim + (but using upper case hex digits). + */ + NotCanonical + }; + + /** The way strings that represent (parts of) URIs are returned from get- + methods. + + @descr Along with a DecodeMechanism parameter, the get-methods all + take an rtl_TextEncoding parameter, which is ignored unless the + DecodeMechanism is DecodeMechanism::WithCharset or DecodeMechanism::Unambiguous. + */ + enum class DecodeMechanism + { + /** The (part of the) URI is returned unchanged. Since URIs are + written using a subset of US-ASCII, the returned string is + guaranteed to contain only US-ASCII characters. + */ + NONE, + + /** All sequences of escape sequences that represent UTF-8 coded + UTF-32 characters with a numerical value greater than 0x7F, are + replaced by the respective UTF-16 characters. All other escape + sequences are not decoded. + */ + ToIUri, + + /** All (sequences of) escape sequences that represent characters from + the specified character set, and that can be converted to UTF-32, + are replaced by the respective UTF-16 characters. All other + escape sequences are not decoded. + */ + WithCharset, + + /** All (sequences of) escape sequences that represent characters from + the specified character set, that can be converted to UTF-32, and + that (in the case of ASCII characters) can safely be decoded + without altering the meaning of the (part of the) URI, are + replaced by the respective UTF-16 characters. All other escape + sequences are not decoded. + */ + Unambiguous + }; + + // General Structure: + + INetURLObject(): + m_aAbsURIRef(256), m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http) {} + + bool HasError() const { return m_eScheme == INetProtocol::NotValid; } + + OUString GetMainURL(DecodeMechanism eMechanism, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8) const + { return decode(m_aAbsURIRef, eMechanism, eCharset); } + + OUString GetURLNoPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) + const; + + OUString GetURLNoMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) + const; + + OUString + getAbbreviated(css::uno::Reference< css::util::XStringWidth > const & rStringWidth, + sal_Int32 nWidth, + DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) + const; + + bool operator ==(INetURLObject const & rObject) const; + + bool operator !=(INetURLObject const & rObject) const + { return !(*this == rObject); } + + // Strict Parsing: + + inline explicit INetURLObject( + std::u16string_view rTheAbsURIRef, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + inline bool SetURL(std::u16string_view rTheAbsURIRef, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + bool ConcatData(INetProtocol eTheScheme, std::u16string_view rTheUser, + std::u16string_view rThePassword, + std::u16string_view rTheHost, sal_uInt32 nThePort, + std::u16string_view rThePath); + + // Smart Parsing: + + inline INetURLObject(std::u16string_view rTheAbsURIRef, + INetProtocol eTheSmartScheme, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8, + FSysStyle eStyle = FSysStyle::Detect); + + void SetSmartProtocol(INetProtocol eTheSmartScheme) + { m_eSmartScheme = eTheSmartScheme; } + + inline bool + SetSmartURL(std::u16string_view rTheAbsURIRef, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8, + FSysStyle eStyle = FSysStyle::Detect); + + inline INetURLObject + smartRel2Abs(OUString const & rTheRelURIRef, + bool & rWasAbsolute, + bool bIgnoreFragment = false, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8, + bool bRelativeNonURIs = false, + FSysStyle eStyle = FSysStyle::Detect) const; + + // Relative URLs: + + inline bool + GetNewAbsURL(OUString const & rTheRelURIRef, + INetURLObject * pTheAbsURIRef) + const; + + /** @descr If rTheRelURIRef cannot be converted to an absolute URL + (because of syntactic reasons), either rTheRelURIRef or an empty + string is returned: If all of the parameters eEncodeMechanism, + eDecodeMechanism and eCharset have their respective default values, + then rTheRelURIRef is returned unmodified; otherwise, an empty string + is returned. + */ + static OUString + GetAbsURL(std::u16string_view rTheBaseURIRef, + OUString const & rTheRelURIRef, + EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded, + DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + static inline OUString + GetRelURL(std::u16string_view rTheBaseURIRef, + OUString const & rTheAbsURIRef, + EncodeMechanism eEncodeMechanism = EncodeMechanism::WasEncoded, + DecodeMechanism eDecodeMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8, + FSysStyle eStyle = FSysStyle::Detect); + + // External URLs: + + OUString getExternalURL() const; + + static inline bool translateToExternal(std::u16string_view rTheIntURIRef, + OUString & rTheExtURIRef, + DecodeMechanism eDecodeMechanism + = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8); + + static inline bool translateToInternal(std::u16string_view rTheExtURIRef, + OUString & rTheIntURIRef, + DecodeMechanism eDecodeMechanism + = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8); + + // Scheme: + + struct SchemeInfo; + + INetProtocol GetProtocol() const { return m_eScheme; } + + bool isSchemeEqualTo(INetProtocol scheme) const { return scheme == m_eScheme; } + + bool isSchemeEqualTo(std::u16string_view scheme) const; + + /** Check if the scheme is one of the WebDAV scheme + * we know about. + * + * @return true is one other scheme either public scheme or private scheme. + */ + bool isAnyKnownWebDAVScheme() const; + + /** Return the URL 'prefix' for a given scheme. + + @param eTheScheme One of the supported URL schemes. + + @return The 'prefix' of URLs of the given scheme. + */ + static OUString GetScheme(INetProtocol eTheScheme); + + /** Return the human-readable name for a given scheme. + + @param eTheScheme One of the supported URL schemes. + + @return The protocol name of URLs of the given scheme. + */ + static const OUString & GetSchemeName(INetProtocol eTheScheme); + + static INetProtocol CompareProtocolScheme(std::u16string_view aTheAbsURIRef); + + // User Info: + + bool HasUserData() const { return m_aUser.isPresent(); } + + OUString GetUser(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8) const + { return decode(m_aUser, eMechanism, eCharset); } + + OUString GetPass(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8) const + { return decode(m_aAuth, eMechanism, eCharset); } + + bool SetUser(std::u16string_view rTheUser) + { return setUser(rTheUser, RTL_TEXTENCODING_UTF8); } + + inline bool SetPass(std::u16string_view rThePassword); + + // Host and Port: + + bool HasPort() const { return m_aPort.isPresent(); } + + OUString GetHost(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8) const + { return decode(m_aHost, eMechanism, eCharset); } + + OUString GetHostPort(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) const; + + sal_uInt32 GetPort() const; + + bool SetHost(std::u16string_view rTheHost) + { return setHost(rTheHost, RTL_TEXTENCODING_UTF8); } + + bool SetPort(sal_uInt32 nThePort); + + // Path: + + bool HasURLPath() const { return !m_aPath.isEmpty(); } + + OUString GetURLPath(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8) const + { return decode(m_aPath, eMechanism, eCharset); } + + bool SetURLPath(std::u16string_view rThePath, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) + { return setPath(rThePath, eMechanism, eCharset); } + + // Hierarchical Path: + + /** A constant to address the last segment in various methods dealing with + hierarchical paths. + + @descr It is often more efficient to address the last segment using + this constant, than to determine its ordinal value using + getSegmentCount(). + */ + enum { LAST_SEGMENT = -1 }; + + /** The number of segments in the hierarchical path. + + @descr Using RFC 2396 and RFC 2234, a hierarchical path is of the + form + + hierarchical-path = 1*("/" segment) + + segment = name *(";" param) + + name = [base ["." extension]] + + base = 1*pchar + + extension = *<any pchar except "."> + + param = *pchar + + @param bIgnoreFinalSlash If true, a final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @return The number of segments in the hierarchical path. If the path + is not hierarchical, 0 is returned. + */ + sal_Int32 getSegmentCount(bool bIgnoreFinalSlash = true) const; + + /** Remove a segment from the hierarchical path. + + @param nIndex The non-negative index of the segment, or LAST_SEGMENT + if addressing the last segment. + + @param bIgnoreFinalSlash If true, a final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @return True if the segment has successfully been removed (and the + resulting URI is still valid). If the path is not hierarchical, or + the specified segment does not exist, false is returned. If false is + returned, the object is not modified. + */ + bool removeSegment(sal_Int32 nIndex = LAST_SEGMENT, + bool bIgnoreFinalSlash = true); + + /** Insert a new segment into the hierarchical path. + A final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @param rTheName The name part of the new segment. The new segment + will contain no parameters. + + @param bAppendFinalSlash If the new segment is appended at the end of + the hierarchical path, this parameter specifies whether to add a final + slash after it or not. + + @param nIndex The non-negative index of the segment before which + to insert the new segment. LAST_SEGMENT or an nIndex that equals + getSegmentCount() inserts the new segment at the end of the + hierarchical path. + + @param eMechanism See the general discussion for set-methods. + + @param eCharset See the general discussion for set-methods. + + @return True if the segment has successfully been inserted (and the + resulting URI is still valid). If the path is not hierarchical, or + the specified place to insert the new segment does not exist, false is + returned. If false is returned, the object is not modified. + */ + bool insertName(std::u16string_view rTheName, + bool bAppendFinalSlash = false, + sal_Int32 nIndex = LAST_SEGMENT, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + /** Get the name of a segment of the hierarchical path. + + @param nIndex The non-negative index of the segment, or LAST_SEGMENT + if addressing the last segment. + + @param bIgnoreFinalSlash If true, a final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @param eMechanism See the general discussion for get-methods. + + @param eCharset See the general discussion for get-methods. + + @return The name part of the specified segment. If the path is not + hierarchical, or the specified segment does not exits, an empty string + is returned. + */ + OUString getName(sal_Int32 nIndex = LAST_SEGMENT, + bool bIgnoreFinalSlash = true, + DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) + const; + + /** Set the name of the last segment (preserving any parameters and any query or + fragment part). + + @param rTheName The new name. + + @param eMechanism See the general discussion for get-methods. + + @param eCharset See the general discussion for get-methods. + + @return True if the name has successfully been modified (and the + resulting URI is still valid). If the path is not hierarchical, or + a last segment does not exist, false is returned. If false is + returned, the object is not modified. + */ + bool setName(std::u16string_view rTheName, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + /** Get the base of the name of a segment. + + @param nIndex The non-negative index of the segment, or LAST_SEGMENT + if addressing the last segment. + + @param bIgnoreFinalSlash If true, a final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @param eMechanism See the general discussion for get-methods. + + @param eCharset See the general discussion for get-methods. + + @return The base part of the specified segment. If the path is + not hierarchical, or the specified segment does not exits, an empty + string is returned. + */ + OUString getBase(sal_Int32 nIndex = LAST_SEGMENT, + bool bIgnoreFinalSlash = true, + DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) + const; + + /** Set the base of the name of a segment (preserving the extension). + A final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @param rTheBase The new base. + + @param nIndex The non-negative index of the segment, or LAST_SEGMENT + if addressing the last segment. + + @param eMechanism See the general discussion for set-methods. + + @param eCharset See the general discussion for set-methods. + + @return True if the base has successfully been modified (and the + resulting URI is still valid). If the path is not hierarchical, or + the specified segment does not exist, false is returned. If false is + returned, the object is not modified. + */ + bool setBase(std::u16string_view rTheBase, + sal_Int32 nIndex = LAST_SEGMENT, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + /** Determine whether the name of the last segment has an extension. + + @return True if the name of the specified segment has an extension. + If the path is not hierarchical, or the specified segment does not + exist, false is returned. + */ + bool hasExtension() const; + + /** Get the extension of the name of a segment. + + @param nIndex The non-negative index of the segment, or LAST_SEGMENT + if addressing the last segment. + + @param bIgnoreFinalSlash If true, a final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @param eMechanism See the general discussion for get-methods. + + @param eCharset See the general discussion for get-methods. + + @return The extension part of the specified segment. If the path is + not hierarchical, or the specified segment does not exits, an empty + string is returned. + */ + OUString getExtension(sal_Int32 nIndex = LAST_SEGMENT, + bool bIgnoreFinalSlash = true, + DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) + const; + + /** Set the extension of the name of a segment (replacing an already + existing extension). + + @param rTheExtension The new extension. + + @param nIndex The non-negative index of the segment, or LAST_SEGMENT + if addressing the last segment. + + @param bIgnoreFinalSlash If true, a final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @param eCharset See the general discussion for set-methods. + + @return True if the extension has successfully been modified (and the + resulting URI is still valid). If the path is not hierarchical, or + the specified segment does not exist, false is returned. If false is + returned, the object is not modified. + */ + bool setExtension(std::u16string_view rTheExtension, + sal_Int32 nIndex = LAST_SEGMENT, + bool bIgnoreFinalSlash = true, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + /** Remove the extension of the name of a segment. + + @param nIndex The non-negative index of the segment, or LAST_SEGMENT + if addressing the last segment. + + @param bIgnoreFinalSlash If true, a final slash at the end of the + hierarchical path does not denote an empty segment, but is ignored. + + @return True if the extension has successfully been removed (and the + resulting URI is still valid), or if the name did not have an + extension. If the path is not hierarchical, or the specified segment + does not exist, false is returned. If false is returned, the object + is not modified. + */ + bool removeExtension(sal_Int32 nIndex = LAST_SEGMENT, + bool bIgnoreFinalSlash = true); + + /** Determine whether the hierarchical path ends in a final slash. + + @return True if the hierarchical path ends in a final slash. If the + path is not hierarchical, false is returned. + */ + bool hasFinalSlash() const; + + /** Make the hierarchical path end in a final slash (if it does not + already do so). + + @return True if a final slash has successfully been appended (and the + resulting URI is still valid), or if the hierarchical path already + ended in a final slash. If the path is not hierarchical, false is + returned. If false is returned, the object is not modified. + */ + bool setFinalSlash(); + + /** Remove a final slash from the hierarchical path. + + @return True if a final slash has successfully been removed (and the + resulting URI is still valid), or if the hierarchical path already did + not end in a final slash. If the path is not hierarchical, false is + returned. If false is returned, the object is not modified. + */ + bool removeFinalSlash(); + + // Query: + + bool HasParam() const { return m_aQuery.isPresent(); } + + OUString GetParam(rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8) const + { return decode(m_aQuery, DecodeMechanism::NONE, eCharset); } + + inline bool SetParam(std::u16string_view rTheQuery, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + // Fragment: + + bool HasMark() const { return m_aFragment.isPresent(); } + + OUString GetMark(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8) const + { return decode(m_aFragment, eMechanism, eCharset); } + + inline bool SetMark(std::u16string_view rTheFragment, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + // File URLs: + + /** Return the file system path represented by a file URL (ignoring any + fragment part). + + @param eStyle The notation of the returned file system path. + + @param pDelimiter Upon successful return, this parameter can return + the character that is the 'main' delimiter within the returned file + system path (e.g., "/" for Unix, "\" for DOS). This is + especially useful for routines that later try to shorten the returned + file system path at a 'good' position, e.g. to fit it into some + limited display space. + + @return The file system path represented by this file URL. If this + file URL does not represent a file system path according to the + specified notation, or if this is not a file URL at all, an empty + string is returned. + */ + OUString getFSysPath(FSysStyle eStyle, sal_Unicode * pDelimiter = nullptr) + const; + + // Data URLs: + std::unique_ptr<SvMemoryStream> getData() const; + + // Coding: + + enum Part + { + PART_USER_PASSWORD = 0x00001, + PART_FPATH = 0x00008, + PART_AUTHORITY = 0x00010, + PART_REL_SEGMENT_EXTRA = 0x00020, + PART_URIC = 0x00040, + PART_HTTP_PATH = 0x00080, + PART_MESSAGE_ID_PATH = 0x00100, + PART_MAILTO = 0x00200, + PART_PATH_BEFORE_QUERY = 0x00400, + PART_PCHAR = 0x00800, + PART_VISIBLE = 0x01000, + PART_VISIBLE_NONSPECIAL = 0x02000, + PART_UNO_PARAM_VALUE = 0x04000, + PART_UNAMBIGUOUS = 0x08000, + PART_URIC_NO_SLASH = 0x10000, + PART_HTTP_QUERY = 0x20000, //TODO! unused? + }; + + enum class EscapeType + { + NONE, + Octet, + Utf32 + }; + + /** Encode some text as part of a URI. + + @param rText Some text (for its interpretation, see the general + discussion for set-methods). + + @param ePart The part says which characters are 'forbidden' and must + be encoded (replaced by escape sequences). Characters outside the US- + ASCII range are always 'forbidden.' + + @param eMechanism See the general discussion for set-methods. + + @param eCharset See the general discussion for set-methods. + + @return The text, encoded according to the given mechanism and + charset ('forbidden' characters replaced by escape sequences). + */ + static OUString encode( std::u16string_view rText, Part ePart, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8); + + + /** Decode some text. + + @param rText Some (encoded) text. + + @param eMechanism See the general discussion for get-methods. + + @param eCharset See the general discussion for get-methods. + + @return The text, decoded according to the given mechanism and + charset (escape sequences replaced by 'raw' characters). + */ + static inline OUString decode(std::u16string_view rText, + DecodeMechanism eMechanism, + rtl_TextEncoding eCharset + = RTL_TEXTENCODING_UTF8); + + static void appendUCS4Escape(OUStringBuffer & rTheText, sal_uInt32 nUCS4); + + static void appendUCS4(OUStringBuffer & rTheText, sal_uInt32 nUCS4, + EscapeType eEscapeType, Part ePart, + rtl_TextEncoding eCharset, bool bKeepVisibleEscapes); + + static sal_uInt32 getUTF32(sal_Unicode const *& rBegin, + sal_Unicode const * pEnd, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset, + EscapeType & rEscapeType); + + // Specialized helpers: + + static sal_uInt32 scanDomain(sal_Unicode const *& rBegin, + sal_Unicode const * pEnd, + bool bEager = true); + + // OBSOLETE Hierarchical Path: + + OUString GetPartBeforeLastName() const; + + /** Get the last segment in the path. + + @param eMechanism See the general discussion for get-methods. + + @param eCharset See the general discussion for get-methods. + + @return For a hierarchical URL, the last segment (everything after + the last unencoded '/'). Note that this last segment may be empty. If + the URL is not hierarchical, an empty string is returned. + */ + OUString GetLastName(DecodeMechanism eMechanism = DecodeMechanism::ToIUri, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8) + const; + + /** Get the 'extension' of the last segment in the path. + + @return For a hierarchical URL, everything after the first unencoded + '.' in the last segment of the path. Note that this 'extension' may + be empty. If the URL is not hierarchical, or if the last segment does + not contain an unencoded '.', an empty string is returned. + */ + OUString GetFileExtension() const; + + bool Append(std::u16string_view rTheSegment, + EncodeMechanism eMechanism = EncodeMechanism::WasEncoded, + rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + + void CutLastName(); + + // OBSOLETE File URLs: + + OUString PathToFileName() const; + + OUString GetFull() const; + + OUString GetPath() const; + + void SetBase(std::u16string_view rTheBase); + + OUString GetBase() const; + + void SetExtension(std::u16string_view rTheExtension); + + OUString CutExtension(); + + static bool IsCaseSensitive() { return true; } + + void changeScheme(INetProtocol eTargetScheme); + + // INetProtocol::Macro, INetProtocol::Uno, INetProtocol::Slot, + // vnd.sun.star.script, etc. All the types of URLs which shouldn't + // be accepted from an outside controlled source + bool IsExoticProtocol() const; + +private: + // General Structure: + + class SAL_DLLPRIVATE SubString + { + sal_Int32 m_nBegin; + sal_Int32 m_nLength; + + public: + explicit SubString(sal_Int32 nTheBegin = -1, + sal_Int32 nTheLength = 0): + m_nBegin(nTheBegin), m_nLength(nTheLength) {} + + bool isPresent() const { return m_nBegin != -1; } + + bool isEmpty() const { return m_nLength == 0; } + + sal_Int32 getBegin() const { return m_nBegin; } + + sal_Int32 getLength() const { return m_nLength; } + + sal_Int32 getEnd() const { return m_nBegin + m_nLength; } + + sal_Int32 clear(); + + sal_Int32 set(OUStringBuffer & rString, + std::u16string_view rSubString, + sal_Int32 nTheBegin); + + sal_Int32 set(OUString & rString, + std::u16string_view rSubString); + + sal_Int32 set(OUStringBuffer & rString, + std::u16string_view rSubString); + + inline void operator +=(sal_Int32 nDelta); + + int compare(SubString const & rOther, + OUStringBuffer const & rThisString, + OUStringBuffer const & rOtherString) const; + }; + + OUStringBuffer m_aAbsURIRef; + SubString m_aScheme; + SubString m_aUser; + SubString m_aAuth; + SubString m_aHost; + SubString m_aPort; + SubString m_aPath; + SubString m_aQuery; + SubString m_aFragment; + INetProtocol m_eScheme; + INetProtocol m_eSmartScheme; + + TOOLS_DLLPRIVATE void setInvalid(); + + bool setAbsURIRef( + std::u16string_view rTheAbsURIRef, + EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart, + FSysStyle eStyle); + + // Relative URLs: + + bool convertRelToAbs( + OUString const & rTheRelURIRef, + INetURLObject & rTheAbsURIRef, bool & rWasAbsolute, + EncodeMechanism eMechanism, rtl_TextEncoding eCharset, + bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs, + FSysStyle eStyle) const; + + bool convertAbsToRel( + OUString const & rTheAbsURIRef, + OUString & rTheRelURIRef, EncodeMechanism eEncodeMechanism, + DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset, + FSysStyle eStyle) const; + + // External URLs: + + static bool convertIntToExt( + std::u16string_view rTheIntURIRef, + OUString & rTheExtURIRef, DecodeMechanism eDecodeMechanism, + rtl_TextEncoding eCharset); + + static bool convertExtToInt( + std::u16string_view rTheExtURIRef, + OUString & rTheIntURIRef, DecodeMechanism eDecodeMechanism, + rtl_TextEncoding eCharset); + + // Scheme: + + struct PrefixInfo; + + TOOLS_DLLPRIVATE static inline SchemeInfo const & getSchemeInfo( + INetProtocol eTheScheme); + + TOOLS_DLLPRIVATE inline SchemeInfo const & getSchemeInfo() const; + + TOOLS_DLLPRIVATE static PrefixInfo const * getPrefix( + sal_Unicode const *& rBegin, sal_Unicode const * pEnd); + + // Authority: + + TOOLS_DLLPRIVATE sal_Int32 getAuthorityBegin() const; + + TOOLS_DLLPRIVATE SubString getAuthority() const; + + // User Info: + + bool setUser( + std::u16string_view rTheUser, + rtl_TextEncoding eCharset); + + bool clearPassword(); + + bool setPassword( + std::u16string_view rThePassword, + rtl_TextEncoding eCharset); + + // Host and Port: + + TOOLS_DLLPRIVATE static bool parseHost( + sal_Unicode const *& rBegin, sal_Unicode const * pEnd, + OUStringBuffer* pCanonic); + + TOOLS_DLLPRIVATE static bool parseHostOrNetBiosName( + sal_Unicode const * pBegin, sal_Unicode const * pEnd, + EncodeMechanism eMechanism, rtl_TextEncoding eCharset, + bool bNetBiosName, OUStringBuffer* pCanonic); + + bool setHost( + std::u16string_view rTheHost, + rtl_TextEncoding eCharset); + + // Path: + + TOOLS_DLLPRIVATE static bool parsePath( + INetProtocol eScheme, sal_Unicode const ** pBegin, + sal_Unicode const * pEnd, EncodeMechanism eMechanism, + rtl_TextEncoding eCharset, bool bSkippedInitialSlash, + sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter, + sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter, + OUStringBuffer &rSynPath); + + bool setPath( + std::u16string_view rThePath, + EncodeMechanism eMechanism, rtl_TextEncoding eCharset); + + // Hierarchical Path: + + TOOLS_DLLPRIVATE bool checkHierarchical() const; + + TOOLS_DLLPRIVATE SubString getSegment( + sal_Int32 nIndex, bool bIgnoreFinalSlash) const; + + // Query: + + void clearQuery(); + + bool setQuery( + std::u16string_view rTheQuery, + EncodeMechanism eMechanism, rtl_TextEncoding eCharset); + + // Fragment: + + bool clearFragment(); + + bool setFragment( + std::u16string_view rTheMark, + EncodeMechanism eMechanism, rtl_TextEncoding eCharset); + + // FILE URLs: + + TOOLS_DLLPRIVATE bool hasDosVolume(FSysStyle eStyle) const; + + // Coding: + + TOOLS_DLLPRIVATE static inline void appendEscape( + OUStringBuffer & rTheText, sal_uInt32 nOctet); + + static void encodeText( + OUStringBuffer& rOutputBuffer, + sal_Unicode const * pBegin, sal_Unicode const * pEnd, + Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, + bool bKeepVisibleEscapes); + + static inline void encodeText( + OUStringBuffer& rOutputBuffer, + std::u16string_view rTheText, Part ePart, + EncodeMechanism eMechanism, rtl_TextEncoding eCharset, + bool bKeepVisibleEscapes); + + static OUString decode( + sal_Unicode const * pBegin, sal_Unicode const * pEnd, + DecodeMechanism, rtl_TextEncoding eCharset); + + inline OUString decode( + SubString const & rSubString, + DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const; + + // Specialized helpers: + + TOOLS_DLLPRIVATE static bool scanIPv6reference( + sal_Unicode const *& rBegin, sal_Unicode const * pEnd); +}; + +// static +inline void INetURLObject::encodeText( OUStringBuffer& rOutputBuffer, + std::u16string_view rTheText, + Part ePart, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset, + bool bKeepVisibleEscapes) +{ + encodeText(rOutputBuffer, + rTheText.data(), + rTheText.data() + rTheText.size(), ePart, + eMechanism, eCharset, bKeepVisibleEscapes); +} + +inline OUString INetURLObject::decode(SubString const & rSubString, + DecodeMechanism eMechanism, + rtl_TextEncoding eCharset) const +{ + return rSubString.isPresent() ? + decode(m_aAbsURIRef.getStr() + rSubString.getBegin(), + m_aAbsURIRef.getStr() + rSubString.getEnd(), + eMechanism, eCharset) : + OUString(); +} + +inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset): + m_aAbsURIRef(rTheAbsURIRef.size() * 2), m_eScheme(INetProtocol::NotValid), m_eSmartScheme(INetProtocol::Http) +{ + setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false, + FSysStyle(0)); +} + +inline bool INetURLObject::SetURL(std::u16string_view rTheAbsURIRef, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset) +{ + return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, false, + FSysStyle(0)); +} + +inline INetURLObject::INetURLObject(std::u16string_view rTheAbsURIRef, + INetProtocol eTheSmartScheme, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset, + FSysStyle eStyle): + m_eScheme(INetProtocol::NotValid), m_eSmartScheme(eTheSmartScheme) +{ + setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true, eStyle); +} + +inline bool INetURLObject::SetSmartURL(std::u16string_view rTheAbsURIRef, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset, + FSysStyle eStyle) +{ + return setAbsURIRef(rTheAbsURIRef, eMechanism, eCharset, true, + eStyle); +} + +inline INetURLObject +INetURLObject::smartRel2Abs(OUString const & rTheRelURIRef, + bool & rWasAbsolute, + bool bIgnoreFragment, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset, + bool bRelativeNonURIs, + FSysStyle eStyle) const +{ + INetURLObject aTheAbsURIRef; + convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, rWasAbsolute, + eMechanism, eCharset, bIgnoreFragment, true, + bRelativeNonURIs, eStyle); + return aTheAbsURIRef; +} + +inline bool INetURLObject::GetNewAbsURL(OUString const & rTheRelURIRef, + INetURLObject * pTheAbsURIRef) + const +{ + INetURLObject aTheAbsURIRef; + bool bWasAbsolute; + if (!convertRelToAbs(rTheRelURIRef, aTheAbsURIRef, bWasAbsolute, + EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false/*bIgnoreFragment*/, false, false, + FSysStyle::Detect)) + return false; + if (pTheAbsURIRef) + *pTheAbsURIRef = aTheAbsURIRef; + return true; +} + +// static +inline OUString INetURLObject::GetRelURL(std::u16string_view rTheBaseURIRef, + OUString const & rTheAbsURIRef, + EncodeMechanism eEncodeMechanism, + DecodeMechanism eDecodeMechanism, + rtl_TextEncoding eCharset, + FSysStyle eStyle) +{ + OUString aTheRelURIRef; + INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset). + convertAbsToRel(rTheAbsURIRef, aTheRelURIRef, eEncodeMechanism, + eDecodeMechanism, eCharset, eStyle); + return aTheRelURIRef; +} + +// static +inline bool INetURLObject::translateToExternal(std::u16string_view + rTheIntURIRef, + OUString & rTheExtURIRef, + DecodeMechanism + eDecodeMechanism, + rtl_TextEncoding eCharset) +{ + return convertIntToExt(rTheIntURIRef, rTheExtURIRef, + eDecodeMechanism, eCharset); +} + +// static +inline bool INetURLObject::translateToInternal(std::u16string_view + rTheExtURIRef, + OUString & rTheIntURIRef, + DecodeMechanism + eDecodeMechanism, + rtl_TextEncoding eCharset) +{ + return convertExtToInt(rTheExtURIRef, rTheIntURIRef, + eDecodeMechanism, eCharset); +} + +inline bool INetURLObject::SetPass(std::u16string_view rThePassword) +{ + return rThePassword.empty() ? + clearPassword() : + setPassword(rThePassword, RTL_TEXTENCODING_UTF8); +} + +inline bool INetURLObject::SetParam(std::u16string_view rTheQuery, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset) +{ + if (rTheQuery.empty()) + { + clearQuery(); + return false; + } + return setQuery(rTheQuery, eMechanism, eCharset); +} + +inline bool INetURLObject::SetMark(std::u16string_view rTheFragment, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset) +{ + return rTheFragment.empty() ? + clearFragment() : + setFragment(rTheFragment, eMechanism, eCharset); +} + +// static +inline OUString INetURLObject::encode(std::u16string_view rText, Part ePart, + EncodeMechanism eMechanism, + rtl_TextEncoding eCharset) +{ + OUStringBuffer aBuf; + encodeText(aBuf, rText, ePart, eMechanism, eCharset, false); + return aBuf.makeStringAndClear(); +} + +// static +inline OUString INetURLObject::decode(std::u16string_view rText, + DecodeMechanism eMechanism, + rtl_TextEncoding eCharset) +{ + return decode(rText.data(), rText.data() + rText.size(), + eMechanism, eCharset); +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |