summaryrefslogtreecommitdiffstats
path: root/upstream/mageia-cauldron/man3pm/bytes.3pm
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 19:43:11 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 19:43:11 +0000
commitfc22b3d6507c6745911b9dfcc68f1e665ae13dbc (patch)
treece1e3bce06471410239a6f41282e328770aa404a /upstream/mageia-cauldron/man3pm/bytes.3pm
parentInitial commit. (diff)
downloadmanpages-l10n-fc22b3d6507c6745911b9dfcc68f1e665ae13dbc.tar.xz
manpages-l10n-fc22b3d6507c6745911b9dfcc68f1e665ae13dbc.zip
Adding upstream version 4.22.0.upstream/4.22.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'upstream/mageia-cauldron/man3pm/bytes.3pm')
-rw-r--r--upstream/mageia-cauldron/man3pm/bytes.3pm156
1 files changed, 156 insertions, 0 deletions
diff --git a/upstream/mageia-cauldron/man3pm/bytes.3pm b/upstream/mageia-cauldron/man3pm/bytes.3pm
new file mode 100644
index 00000000..39c09d6c
--- /dev/null
+++ b/upstream/mageia-cauldron/man3pm/bytes.3pm
@@ -0,0 +1,156 @@
+.\" -*- mode: troff; coding: utf-8 -*-
+.\" Automatically generated by Pod::Man 5.01 (Pod::Simple 3.43)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" \*(C` and \*(C' are quotes in nroff, nothing in troff, for use with C<>.
+.ie n \{\
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.nr rF 0
+.if \n(.g .if rF .nr rF 1
+.if (\n(rF:(\n(.g==0)) \{\
+. if \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+. \}
+.\}
+.rr rF
+.\" ========================================================================
+.\"
+.IX Title "bytes 3pm"
+.TH bytes 3pm 2023-11-28 "perl v5.38.2" "Perl Programmers Reference Guide"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH NAME
+bytes \- Perl pragma to expose the individual bytes of characters
+.SH NOTICE
+.IX Header "NOTICE"
+Because the bytes pragma breaks encapsulation (i.e. it exposes the innards of
+how the perl executable currently happens to store a string), the byte values
+that result are in an unspecified encoding.
+.PP
+\&\fBUse of this module for anything other than debugging purposes is
+strongly discouraged.\fR If you feel that the functions here within
+might be useful for your application, this possibly indicates a
+mismatch between your mental model of Perl Unicode and the current
+reality. In that case, you may wish to read some of the perl Unicode
+documentation: perluniintro, perlunitut, perlunifaq and
+perlunicode.
+.SH SYNOPSIS
+.IX Header "SYNOPSIS"
+.Vb 8
+\& use bytes;
+\& ... chr(...); # or bytes::chr
+\& ... index(...); # or bytes::index
+\& ... length(...); # or bytes::length
+\& ... ord(...); # or bytes::ord
+\& ... rindex(...); # or bytes::rindex
+\& ... substr(...); # or bytes::substr
+\& no bytes;
+.Ve
+.SH DESCRIPTION
+.IX Header "DESCRIPTION"
+Perl's characters are stored internally as sequences of one or more bytes.
+This pragma allows for the examination of the individual bytes that together
+comprise a character.
+.PP
+Originally the pragma was designed for the loftier goal of helping incorporate
+Unicode into Perl, but the approach that used it was found to be defective,
+and the one remaining legitimate use is for debugging when you need to
+non-destructively examine characters' individual bytes. Just insert this
+pragma temporarily, and remove it after the debugging is finished.
+.PP
+The original usage can be accomplished by explicit (rather than this pragma's
+implicit) encoding using the Encode module:
+.PP
+.Vb 1
+\& use Encode qw/encode/;
+\&
+\& my $utf8_byte_string = encode "UTF8", $string;
+\& my $latin1_byte_string = encode "Latin1", $string;
+.Ve
+.PP
+Or, if performance is needed and you are only interested in the UTF\-8
+representation:
+.PP
+.Vb 1
+\& utf8::encode(my $utf8_byte_string = $string);
+.Ve
+.PP
+\&\f(CW\*(C`no bytes\*(C'\fR can be used to reverse the effect of \f(CW\*(C`use bytes\*(C'\fR within the
+current lexical scope.
+.PP
+As an example, when Perl sees \f(CW\*(C`$x = chr(400)\*(C'\fR, it encodes the character
+in UTF\-8 and stores it in \f(CW$x\fR. Then it is marked as character data, so,
+for instance, \f(CW\*(C`length $x\*(C'\fR returns \f(CW1\fR. However, in the scope of the
+\&\f(CW\*(C`bytes\*(C'\fR pragma, \f(CW$x\fR is treated as a series of bytes \- the bytes that make
+up the UTF8 encoding \- and \f(CW\*(C`length $x\*(C'\fR returns \f(CW2\fR:
+.PP
+.Vb 9
+\& $x = chr(400);
+\& print "Length is ", length $x, "\en"; # "Length is 1"
+\& printf "Contents are %vd\en", $x; # "Contents are 400"
+\& {
+\& use bytes; # or "require bytes; bytes::length()"
+\& print "Length is ", length $x, "\en"; # "Length is 2"
+\& printf "Contents are %vd\en", $x; # "Contents are 198.144 (on
+\& # ASCII platforms)"
+\& }
+.Ve
+.PP
+\&\f(CWchr()\fR, \f(CWord()\fR, \f(CWsubstr()\fR, \f(CWindex()\fR and \f(CWrindex()\fR behave similarly.
+.PP
+For more on the implications, see perluniintro and perlunicode.
+.PP
+\&\f(CWbytes::length()\fR is admittedly handy if you need to know the
+\&\fBbyte length\fR of a Perl scalar. But a more modern way is:
+.PP
+.Vb 2
+\& use Encode \*(Aqencode\*(Aq;
+\& length(encode(\*(AqUTF\-8\*(Aq, $scalar))
+.Ve
+.SH LIMITATIONS
+.IX Header "LIMITATIONS"
+\&\f(CWbytes::substr()\fR does not work as an \fR\f(BIlvalue()\fR\fI\fR.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+perluniintro, perlunicode, utf8, Encode