summaryrefslogtreecommitdiffstats
path: root/upstream/fedora-40/man1/perlhacktut.1
diff options
context:
space:
mode:
Diffstat (limited to 'upstream/fedora-40/man1/perlhacktut.1')
-rw-r--r--upstream/fedora-40/man1/perlhacktut.1264
1 files changed, 264 insertions, 0 deletions
diff --git a/upstream/fedora-40/man1/perlhacktut.1 b/upstream/fedora-40/man1/perlhacktut.1
new file mode 100644
index 00000000..2ea619a5
--- /dev/null
+++ b/upstream/fedora-40/man1/perlhacktut.1
@@ -0,0 +1,264 @@
+.\" -*- mode: troff; coding: utf-8 -*-
+.\" Automatically generated by Pod::Man 5.01 (Pod::Simple 3.43)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" \*(C` and \*(C' are quotes in nroff, nothing in troff, for use with C<>.
+.ie n \{\
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.nr rF 0
+.if \n(.g .if rF .nr rF 1
+.if (\n(rF:(\n(.g==0)) \{\
+. if \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+. \}
+.\}
+.rr rF
+.\" ========================================================================
+.\"
+.IX Title "PERLHACKTUT 1"
+.TH PERLHACKTUT 1 2024-01-25 "perl v5.38.2" "Perl Programmers Reference Guide"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH NAME
+perlhacktut \- Walk through the creation of a simple C code patch
+.SH DESCRIPTION
+.IX Header "DESCRIPTION"
+This document takes you through a simple patch example.
+.PP
+If you haven't read perlhack yet, go do that first! You might also
+want to read through perlsource too.
+.PP
+Once you're done here, check out perlhacktips next.
+.SH "EXAMPLE OF A SIMPLE PATCH"
+.IX Header "EXAMPLE OF A SIMPLE PATCH"
+Let's take a simple patch from start to finish.
+.PP
+Here's something Larry suggested: if a \f(CW\*(C`U\*(C'\fR is the first active format
+during a \f(CW\*(C`pack\*(C'\fR, (for example, \f(CW\*(C`pack "U3C8", @stuff\*(C'\fR) then the
+resulting string should be treated as UTF\-8 encoded.
+.PP
+If you are working with a git clone of the Perl repository, you will
+want to create a branch for your changes. This will make creating a
+proper patch much simpler. See the perlgit for details on how to do
+this.
+.SS "Writing the patch"
+.IX Subsection "Writing the patch"
+How do we prepare to fix this up? First we locate the code in question
+\&\- the \f(CW\*(C`pack\*(C'\fR happens at runtime, so it's going to be in one of the
+\&\fIpp\fR files. Sure enough, \f(CW\*(C`pp_pack\*(C'\fR is in \fIpp.c\fR. Since we're going
+to be altering this file, let's copy it to \fIpp.c~\fR.
+.PP
+[Well, it was in \fIpp.c\fR when this tutorial was written. It has now
+been split off with \f(CW\*(C`pp_unpack\*(C'\fR to its own file, \fIpp_pack.c\fR]
+.PP
+Now let's look over \f(CW\*(C`pp_pack\*(C'\fR: we take a pattern into \f(CW\*(C`pat\*(C'\fR, and then
+loop over the pattern, taking each format character in turn into
+\&\f(CW\*(C`datum_type\*(C'\fR. Then for each possible format character, we swallow up
+the other arguments in the pattern (a field width, an asterisk, and so
+on) and convert the next chunk input into the specified format, adding
+it onto the output SV \f(CW\*(C`cat\*(C'\fR.
+.PP
+How do we know if the \f(CW\*(C`U\*(C'\fR is the first format in the \f(CW\*(C`pat\*(C'\fR? Well, if
+we have a pointer to the start of \f(CW\*(C`pat\*(C'\fR then, if we see a \f(CW\*(C`U\*(C'\fR we can
+test whether we're still at the start of the string. So, here's where
+\&\f(CW\*(C`pat\*(C'\fR is set up:
+.PP
+.Vb 6
+\& STRLEN fromlen;
+\& char *pat = SvPVx(*++MARK, fromlen);
+\& char *patend = pat + fromlen;
+\& I32 len;
+\& I32 datumtype;
+\& SV *fromstr;
+.Ve
+.PP
+We'll have another string pointer in there:
+.PP
+.Vb 7
+\& STRLEN fromlen;
+\& char *pat = SvPVx(*++MARK, fromlen);
+\& char *patend = pat + fromlen;
+\& + char *patcopy;
+\& I32 len;
+\& I32 datumtype;
+\& SV *fromstr;
+.Ve
+.PP
+And just before we start the loop, we'll set \f(CW\*(C`patcopy\*(C'\fR to be the start
+of \f(CW\*(C`pat\*(C'\fR:
+.PP
+.Vb 5
+\& items = SP \- MARK;
+\& MARK++;
+\& SvPVCLEAR(cat);
+\& + patcopy = pat;
+\& while (pat < patend) {
+.Ve
+.PP
+Now if we see a \f(CW\*(C`U\*(C'\fR which was at the start of the string, we turn on
+the \f(CW\*(C`UTF8\*(C'\fR flag for the output SV, \f(CW\*(C`cat\*(C'\fR:
+.PP
+.Vb 5
+\& + if (datumtype == \*(AqU\*(Aq && pat==patcopy+1)
+\& + SvUTF8_on(cat);
+\& if (datumtype == \*(Aq#\*(Aq) {
+\& while (pat < patend && *pat != \*(Aq\en\*(Aq)
+\& pat++;
+.Ve
+.PP
+Remember that it has to be \f(CW\*(C`patcopy+1\*(C'\fR because the first character of
+the string is the \f(CW\*(C`U\*(C'\fR which has been swallowed into \f(CW\*(C`datumtype!\*(C'\fR
+.PP
+Oops, we forgot one thing: what if there are spaces at the start of the
+pattern? \f(CW\*(C`pack(" U*", @stuff)\*(C'\fR will have \f(CW\*(C`U\*(C'\fR as the first active
+character, even though it's not the first thing in the pattern. In this
+case, we have to advance \f(CW\*(C`patcopy\*(C'\fR along with \f(CW\*(C`pat\*(C'\fR when we see
+spaces:
+.PP
+.Vb 2
+\& if (isSPACE(datumtype))
+\& continue;
+.Ve
+.PP
+needs to become
+.PP
+.Vb 4
+\& if (isSPACE(datumtype)) {
+\& patcopy++;
+\& continue;
+\& }
+.Ve
+.PP
+OK. That's the C part done. Now we must do two additional things before
+this patch is ready to go: we've changed the behaviour of Perl, and so
+we must document that change. We must also provide some more regression
+tests to make sure our patch works and doesn't create a bug somewhere
+else along the line.
+.SS "Testing the patch"
+.IX Subsection "Testing the patch"
+The regression tests for each operator live in \fIt/op/\fR, and so we make
+a copy of \fIt/op/pack.t\fR to \fIt/op/pack.t~\fR. Now we can add our tests
+to the end. First, we'll test that the \f(CW\*(C`U\*(C'\fR does indeed create Unicode
+strings.
+.PP
+t/op/pack.t has a sensible \fBok()\fR function, but if it didn't we could use
+the one from t/test.pl.
+.PP
+.Vb 2
+\& require \*(Aq./test.pl\*(Aq;
+\& plan( tests => 159 );
+.Ve
+.PP
+so instead of this:
+.PP
+.Vb 3
+\& print \*(Aqnot \*(Aq unless "1.20.300.4000" eq sprintf "%vd",
+\& pack("U*",1,20,300,4000);
+\& print "ok $test\en"; $test++;
+.Ve
+.PP
+we can write the more sensible (see Test::More for a full
+explanation of \fBis()\fR and other testing functions).
+.PP
+.Vb 2
+\& is( "1.20.300.4000", sprintf "%vd", pack("U*",1,20,300,4000),
+\& "U* produces Unicode" );
+.Ve
+.PP
+Now we'll test that we got that space-at-the-beginning business right:
+.PP
+.Vb 2
+\& is( "1.20.300.4000", sprintf "%vd", pack(" U*",1,20,300,4000),
+\& " with spaces at the beginning" );
+.Ve
+.PP
+And finally we'll test that we don't make Unicode strings if \f(CW\*(C`U\*(C'\fR is
+\&\fBnot\fR the first active format:
+.PP
+.Vb 2
+\& isnt( v1.20.300.4000, sprintf "%vd", pack("C0U*",1,20,300,4000),
+\& "U* not first isn\*(Aqt Unicode" );
+.Ve
+.PP
+Mustn't forget to change the number of tests which appears at the top,
+or else the automated tester will get confused. This will either look
+like this:
+.PP
+.Vb 1
+\& print "1..156\en";
+.Ve
+.PP
+or this:
+.PP
+.Vb 1
+\& plan( tests => 156 );
+.Ve
+.PP
+We now compile up Perl, and run it through the test suite. Our new
+tests pass, hooray!
+.SS "Documenting the patch"
+.IX Subsection "Documenting the patch"
+Finally, the documentation. The job is never done until the paperwork
+is over, so let's describe the change we've just made. The relevant
+place is \fIpod/perlfunc.pod\fR; again, we make a copy, and then we'll
+insert this text in the description of \f(CW\*(C`pack\*(C'\fR:
+.PP
+.Vb 1
+\& =item *
+\&
+\& If the pattern begins with a C<U>, the resulting string will be treated
+\& as UTF\-8\-encoded Unicode. You can force UTF\-8 encoding on in a string
+\& with an initial C<U0>, and the bytes that follow will be interpreted as
+\& Unicode characters. If you don\*(Aqt want this to happen, you can begin
+\& your pattern with C<C0> (or anything else) to force Perl not to UTF\-8
+\& encode your string, and then follow this with a C<U*> somewhere in your
+\& pattern.
+.Ve
+.SS Submit
+.IX Subsection "Submit"
+See perlhack for details on how to submit this patch.
+.SH AUTHOR
+.IX Header "AUTHOR"
+This document was originally written by Nathan Torkington, and is
+maintained by the perl5\-porters mailing list.