summaryrefslogtreecommitdiffstats
path: root/phaul
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 17:08:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 17:08:40 +0000
commitdb4355c97c4d09cabe0ff0d522acff7b672d18df (patch)
tree078ed2ef0cd31b9992ba7a9e94ad05bb5e4b6077 /phaul
parentInitial commit. (diff)
downloadgolang-github-checkpoint-restore-go-criu-db4355c97c4d09cabe0ff0d522acff7b672d18df.tar.xz
golang-github-checkpoint-restore-go-criu-db4355c97c4d09cabe0ff0d522acff7b672d18df.zip
Adding upstream version 6.3.0+ds1.upstream/6.3.0+ds1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'phaul')
-rw-r--r--phaul/api.go45
-rw-r--r--phaul/client.go141
-rw-r--r--phaul/images.go42
-rw-r--r--phaul/server.go108
4 files changed, 336 insertions, 0 deletions
diff --git a/phaul/api.go b/phaul/api.go
new file mode 100644
index 0000000..f675f80
--- /dev/null
+++ b/phaul/api.go
@@ -0,0 +1,45 @@
+package phaul
+
+import (
+ "github.com/checkpoint-restore/go-criu/v6"
+)
+
+// Config is the configuration which is passed around
+//
+// Pid is what we migrate
+// Memfd is the file descriptor via which criu can transfer memory pages.
+// Wdir is the directory where phaul can put images and other stuff
+type Config struct {
+ Pid int
+ Memfd int
+ Wdir string
+}
+
+// Remote interface
+// Rpc between PhaulClient and PhaulServer. When client
+// calls anything on this one, the corresponding method
+// should be called on PhaulServer object.
+type Remote interface {
+ StartIter() error
+ StopIter() error
+}
+
+// Local interface
+// Interface to local classes. Client calls them when it needs something on the source node.
+//
+// Methods:
+//
+// - DumpCopyRestore() is called on client side when the
+// pre-iterations are over and it's time to do full dump,
+// copy images and restore them on the server side.
+// All the time this method is executed victim tree is
+// frozen on client. Returning nil kills the tree, error
+// unfreezes it and resumes. The criu argument is the
+// pointer on created criu.Criu object on which client
+// may call Dump(). The requirement on opts passed are:
+// set Ps.Fd to comm.Memfd
+// set ParentImg to lastClientImagesPath
+// set TrackMem to true
+type Local interface {
+ DumpCopyRestore(criu *criu.Criu, c Config, lastClientImagesPath string) error
+}
diff --git a/phaul/client.go b/phaul/client.go
new file mode 100644
index 0000000..fd5e877
--- /dev/null
+++ b/phaul/client.go
@@ -0,0 +1,141 @@
+package phaul
+
+import (
+ "fmt"
+ "path/filepath"
+
+ "github.com/checkpoint-restore/go-criu/v6"
+ "github.com/checkpoint-restore/go-criu/v6/crit"
+ "github.com/checkpoint-restore/go-criu/v6/crit/images"
+ "github.com/checkpoint-restore/go-criu/v6/rpc"
+ "google.golang.org/protobuf/proto"
+)
+
+const (
+ minPagesWritten uint64 = 64
+ maxIters int = 8
+ maxGrowDelta int64 = 32
+)
+
+// Client struct
+type Client struct {
+ local Local
+ remote Remote
+ cfg Config
+}
+
+// MakePhaulClient function
+// Main entry point. Caller should create the client object by
+// passing here local, remote and comm. See comment in corresponding
+// interfaces/structs for explanation.
+//
+// Then call client.Migrate() and enjoy :)
+func MakePhaulClient(l Local, r Remote, c Config) (*Client, error) {
+ return &Client{local: l, remote: r, cfg: c}, nil
+}
+
+func isLastIter(iter int, stats *images.DumpStatsEntry, prevStats *images.DumpStatsEntry) bool {
+ if iter >= maxIters {
+ fmt.Printf("`- max iters reached\n")
+ return true
+ }
+
+ pagesWritten := stats.GetPagesWritten()
+ if pagesWritten < minPagesWritten {
+ fmt.Printf("`- tiny pre-dump (%d) reached\n", int(pagesWritten))
+ return true
+ }
+
+ pagesDelta := int64(pagesWritten) - int64(prevStats.GetPagesWritten())
+ if pagesDelta >= maxGrowDelta {
+ fmt.Printf("`- grow iter (%d) reached\n", int(pagesDelta))
+ return true
+ }
+
+ return false
+}
+
+// Migrate function
+func (pc *Client) Migrate() error {
+ criu := criu.MakeCriu()
+ psi := rpc.CriuPageServerInfo{
+ Fd: proto.Int32(int32(pc.cfg.Memfd)),
+ }
+ opts := &rpc.CriuOpts{
+ Pid: proto.Int32(int32(pc.cfg.Pid)),
+ LogLevel: proto.Int32(4),
+ LogFile: proto.String("pre-dump.log"),
+ Ps: &psi,
+ }
+
+ err := criu.Prepare()
+ if err != nil {
+ return err
+ }
+
+ defer criu.Cleanup()
+
+ imgs, err := preparePhaulImages(pc.cfg.Wdir)
+ if err != nil {
+ return err
+ }
+ prevStats := &images.DumpStatsEntry{}
+ iter := 0
+
+ for {
+ err = pc.remote.StartIter()
+ if err != nil {
+ return err
+ }
+
+ prevP := imgs.lastImagesDir()
+ imgDir, err := imgs.openNextDir()
+ if err != nil {
+ return err
+ }
+
+ opts.ImagesDirFd = proto.Int32(int32(imgDir.Fd()))
+ if prevP != "" {
+ opts.ParentImg = proto.String(prevP)
+ }
+
+ err = criu.PreDump(opts, nil)
+ imgDir.Close()
+ if err != nil {
+ return err
+ }
+
+ iter++
+
+ err = pc.remote.StopIter()
+ if err != nil {
+ return err
+ }
+
+ // Get dump statistics with crit
+ c := crit.New(filepath.Join(imgDir.Name(), "stats-dump"), "", "", false, false)
+ statsImg, err := c.Decode()
+ if err != nil {
+ return err
+ }
+ stats := statsImg.Entries[0].Message.(*images.StatsEntry).GetDump()
+
+ if isLastIter(iter, stats, prevStats) {
+ break
+ }
+
+ prevStats = stats
+ }
+
+ err = pc.remote.StartIter()
+ if err == nil {
+ prevP := imgs.lastImagesDir()
+ err = pc.local.DumpCopyRestore(criu, pc.cfg, prevP)
+ err2 := pc.remote.StopIter()
+ if err == nil {
+ err = err2
+ }
+ }
+
+ return err
+}
diff --git a/phaul/images.go b/phaul/images.go
new file mode 100644
index 0000000..f355a86
--- /dev/null
+++ b/phaul/images.go
@@ -0,0 +1,42 @@
+package phaul
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+)
+
+type image struct {
+ cursor int
+ dir string
+}
+
+//nolint:unparam // suppress "error is always nil" warning
+func preparePhaulImages(wdir string) (*image, error) {
+ return &image{dir: wdir}, nil
+}
+
+func (i *image) getPath(idx int) string {
+ return fmt.Sprintf(i.dir+"/%d", idx)
+}
+
+func (i *image) openNextDir() (*os.File, error) {
+ ipath := i.getPath(i.cursor)
+ err := os.Mkdir(ipath, 0o700)
+ if err != nil {
+ return nil, err
+ }
+
+ i.cursor++
+ return os.Open(ipath)
+}
+
+func (i *image) lastImagesDir() string {
+ var ret string
+ if i.cursor == 0 {
+ ret = ""
+ } else {
+ ret, _ = filepath.Abs(i.getPath(i.cursor - 1))
+ }
+ return ret
+}
diff --git a/phaul/server.go b/phaul/server.go
new file mode 100644
index 0000000..da17756
--- /dev/null
+++ b/phaul/server.go
@@ -0,0 +1,108 @@
+package phaul
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+
+ "github.com/checkpoint-restore/go-criu/v6"
+ "github.com/checkpoint-restore/go-criu/v6/rpc"
+ "golang.org/x/sys/unix"
+ "google.golang.org/protobuf/proto"
+)
+
+// Server struct
+type Server struct {
+ cfg Config
+ imgs *image
+ cr *criu.Criu
+ process *os.Process
+}
+
+// MakePhaulServer function
+// Main entry point. Make the server with comm and call PhaulRemote
+// methods on it upon client requests.
+func MakePhaulServer(c Config) (*Server, error) {
+ img, err := preparePhaulImages(c.Wdir)
+ if err != nil {
+ return nil, err
+ }
+
+ cr := criu.MakeCriu()
+
+ return &Server{imgs: img, cfg: c, cr: cr}, nil
+}
+
+// StartIter phaul.Remote methods
+func (s *Server) StartIter() error {
+ fmt.Printf("S: start iter\n")
+ psi := rpc.CriuPageServerInfo{
+ Fd: proto.Int32(int32(s.cfg.Memfd)),
+ }
+ opts := &rpc.CriuOpts{
+ LogLevel: proto.Int32(4),
+ LogFile: proto.String("ps.log"),
+ Ps: &psi,
+ }
+
+ prevP := s.imgs.lastImagesDir()
+ imgDir, err := s.imgs.openNextDir()
+ if err != nil {
+ return err
+ }
+ defer imgDir.Close()
+
+ opts.ImagesDirFd = proto.Int32(int32(imgDir.Fd()))
+ if prevP != "" {
+ p, err := filepath.Abs(imgDir.Name())
+ if err != nil {
+ return err
+ }
+ rel, err := filepath.Rel(p, prevP)
+ if err != nil {
+ return err
+ }
+ opts.ParentImg = proto.String(rel)
+ }
+
+ pid, _, err := s.cr.StartPageServerChld(opts)
+ if err != nil {
+ return err
+ }
+
+ s.process, err = os.FindProcess(pid)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// StopIter function
+func (s *Server) StopIter() error {
+ if s.process == nil {
+ return errors.New("No process to stop")
+ }
+ state, err := s.process.Wait()
+ if err != nil && !errors.Is(err, unix.ECHILD) {
+ return err
+ }
+
+ if err == nil && !state.Success() {
+ return fmt.Errorf("page-server failed: %s", state)
+ }
+ return nil
+}
+
+// Server-local methods
+
+// LastImagesDir function
+func (s *Server) LastImagesDir() string {
+ return s.imgs.lastImagesDir()
+}
+
+// GetCriu function
+func (s *Server) GetCriu() *criu.Criu {
+ return s.cr
+}