diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 17:08:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 17:08:40 +0000 |
commit | db4355c97c4d09cabe0ff0d522acff7b672d18df (patch) | |
tree | 078ed2ef0cd31b9992ba7a9e94ad05bb5e4b6077 /phaul | |
parent | Initial commit. (diff) | |
download | golang-github-checkpoint-restore-go-criu-db4355c97c4d09cabe0ff0d522acff7b672d18df.tar.xz golang-github-checkpoint-restore-go-criu-db4355c97c4d09cabe0ff0d522acff7b672d18df.zip |
Adding upstream version 6.3.0+ds1.upstream/6.3.0+ds1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'phaul')
-rw-r--r-- | phaul/api.go | 45 | ||||
-rw-r--r-- | phaul/client.go | 141 | ||||
-rw-r--r-- | phaul/images.go | 42 | ||||
-rw-r--r-- | phaul/server.go | 108 |
4 files changed, 336 insertions, 0 deletions
diff --git a/phaul/api.go b/phaul/api.go new file mode 100644 index 0000000..f675f80 --- /dev/null +++ b/phaul/api.go @@ -0,0 +1,45 @@ +package phaul + +import ( + "github.com/checkpoint-restore/go-criu/v6" +) + +// Config is the configuration which is passed around +// +// Pid is what we migrate +// Memfd is the file descriptor via which criu can transfer memory pages. +// Wdir is the directory where phaul can put images and other stuff +type Config struct { + Pid int + Memfd int + Wdir string +} + +// Remote interface +// Rpc between PhaulClient and PhaulServer. When client +// calls anything on this one, the corresponding method +// should be called on PhaulServer object. +type Remote interface { + StartIter() error + StopIter() error +} + +// Local interface +// Interface to local classes. Client calls them when it needs something on the source node. +// +// Methods: +// +// - DumpCopyRestore() is called on client side when the +// pre-iterations are over and it's time to do full dump, +// copy images and restore them on the server side. +// All the time this method is executed victim tree is +// frozen on client. Returning nil kills the tree, error +// unfreezes it and resumes. The criu argument is the +// pointer on created criu.Criu object on which client +// may call Dump(). The requirement on opts passed are: +// set Ps.Fd to comm.Memfd +// set ParentImg to lastClientImagesPath +// set TrackMem to true +type Local interface { + DumpCopyRestore(criu *criu.Criu, c Config, lastClientImagesPath string) error +} diff --git a/phaul/client.go b/phaul/client.go new file mode 100644 index 0000000..fd5e877 --- /dev/null +++ b/phaul/client.go @@ -0,0 +1,141 @@ +package phaul + +import ( + "fmt" + "path/filepath" + + "github.com/checkpoint-restore/go-criu/v6" + "github.com/checkpoint-restore/go-criu/v6/crit" + "github.com/checkpoint-restore/go-criu/v6/crit/images" + "github.com/checkpoint-restore/go-criu/v6/rpc" + "google.golang.org/protobuf/proto" +) + +const ( + minPagesWritten uint64 = 64 + maxIters int = 8 + maxGrowDelta int64 = 32 +) + +// Client struct +type Client struct { + local Local + remote Remote + cfg Config +} + +// MakePhaulClient function +// Main entry point. Caller should create the client object by +// passing here local, remote and comm. See comment in corresponding +// interfaces/structs for explanation. +// +// Then call client.Migrate() and enjoy :) +func MakePhaulClient(l Local, r Remote, c Config) (*Client, error) { + return &Client{local: l, remote: r, cfg: c}, nil +} + +func isLastIter(iter int, stats *images.DumpStatsEntry, prevStats *images.DumpStatsEntry) bool { + if iter >= maxIters { + fmt.Printf("`- max iters reached\n") + return true + } + + pagesWritten := stats.GetPagesWritten() + if pagesWritten < minPagesWritten { + fmt.Printf("`- tiny pre-dump (%d) reached\n", int(pagesWritten)) + return true + } + + pagesDelta := int64(pagesWritten) - int64(prevStats.GetPagesWritten()) + if pagesDelta >= maxGrowDelta { + fmt.Printf("`- grow iter (%d) reached\n", int(pagesDelta)) + return true + } + + return false +} + +// Migrate function +func (pc *Client) Migrate() error { + criu := criu.MakeCriu() + psi := rpc.CriuPageServerInfo{ + Fd: proto.Int32(int32(pc.cfg.Memfd)), + } + opts := &rpc.CriuOpts{ + Pid: proto.Int32(int32(pc.cfg.Pid)), + LogLevel: proto.Int32(4), + LogFile: proto.String("pre-dump.log"), + Ps: &psi, + } + + err := criu.Prepare() + if err != nil { + return err + } + + defer criu.Cleanup() + + imgs, err := preparePhaulImages(pc.cfg.Wdir) + if err != nil { + return err + } + prevStats := &images.DumpStatsEntry{} + iter := 0 + + for { + err = pc.remote.StartIter() + if err != nil { + return err + } + + prevP := imgs.lastImagesDir() + imgDir, err := imgs.openNextDir() + if err != nil { + return err + } + + opts.ImagesDirFd = proto.Int32(int32(imgDir.Fd())) + if prevP != "" { + opts.ParentImg = proto.String(prevP) + } + + err = criu.PreDump(opts, nil) + imgDir.Close() + if err != nil { + return err + } + + iter++ + + err = pc.remote.StopIter() + if err != nil { + return err + } + + // Get dump statistics with crit + c := crit.New(filepath.Join(imgDir.Name(), "stats-dump"), "", "", false, false) + statsImg, err := c.Decode() + if err != nil { + return err + } + stats := statsImg.Entries[0].Message.(*images.StatsEntry).GetDump() + + if isLastIter(iter, stats, prevStats) { + break + } + + prevStats = stats + } + + err = pc.remote.StartIter() + if err == nil { + prevP := imgs.lastImagesDir() + err = pc.local.DumpCopyRestore(criu, pc.cfg, prevP) + err2 := pc.remote.StopIter() + if err == nil { + err = err2 + } + } + + return err +} diff --git a/phaul/images.go b/phaul/images.go new file mode 100644 index 0000000..f355a86 --- /dev/null +++ b/phaul/images.go @@ -0,0 +1,42 @@ +package phaul + +import ( + "fmt" + "os" + "path/filepath" +) + +type image struct { + cursor int + dir string +} + +//nolint:unparam // suppress "error is always nil" warning +func preparePhaulImages(wdir string) (*image, error) { + return &image{dir: wdir}, nil +} + +func (i *image) getPath(idx int) string { + return fmt.Sprintf(i.dir+"/%d", idx) +} + +func (i *image) openNextDir() (*os.File, error) { + ipath := i.getPath(i.cursor) + err := os.Mkdir(ipath, 0o700) + if err != nil { + return nil, err + } + + i.cursor++ + return os.Open(ipath) +} + +func (i *image) lastImagesDir() string { + var ret string + if i.cursor == 0 { + ret = "" + } else { + ret, _ = filepath.Abs(i.getPath(i.cursor - 1)) + } + return ret +} diff --git a/phaul/server.go b/phaul/server.go new file mode 100644 index 0000000..da17756 --- /dev/null +++ b/phaul/server.go @@ -0,0 +1,108 @@ +package phaul + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/checkpoint-restore/go-criu/v6" + "github.com/checkpoint-restore/go-criu/v6/rpc" + "golang.org/x/sys/unix" + "google.golang.org/protobuf/proto" +) + +// Server struct +type Server struct { + cfg Config + imgs *image + cr *criu.Criu + process *os.Process +} + +// MakePhaulServer function +// Main entry point. Make the server with comm and call PhaulRemote +// methods on it upon client requests. +func MakePhaulServer(c Config) (*Server, error) { + img, err := preparePhaulImages(c.Wdir) + if err != nil { + return nil, err + } + + cr := criu.MakeCriu() + + return &Server{imgs: img, cfg: c, cr: cr}, nil +} + +// StartIter phaul.Remote methods +func (s *Server) StartIter() error { + fmt.Printf("S: start iter\n") + psi := rpc.CriuPageServerInfo{ + Fd: proto.Int32(int32(s.cfg.Memfd)), + } + opts := &rpc.CriuOpts{ + LogLevel: proto.Int32(4), + LogFile: proto.String("ps.log"), + Ps: &psi, + } + + prevP := s.imgs.lastImagesDir() + imgDir, err := s.imgs.openNextDir() + if err != nil { + return err + } + defer imgDir.Close() + + opts.ImagesDirFd = proto.Int32(int32(imgDir.Fd())) + if prevP != "" { + p, err := filepath.Abs(imgDir.Name()) + if err != nil { + return err + } + rel, err := filepath.Rel(p, prevP) + if err != nil { + return err + } + opts.ParentImg = proto.String(rel) + } + + pid, _, err := s.cr.StartPageServerChld(opts) + if err != nil { + return err + } + + s.process, err = os.FindProcess(pid) + if err != nil { + return err + } + + return nil +} + +// StopIter function +func (s *Server) StopIter() error { + if s.process == nil { + return errors.New("No process to stop") + } + state, err := s.process.Wait() + if err != nil && !errors.Is(err, unix.ECHILD) { + return err + } + + if err == nil && !state.Success() { + return fmt.Errorf("page-server failed: %s", state) + } + return nil +} + +// Server-local methods + +// LastImagesDir function +func (s *Server) LastImagesDir() string { + return s.imgs.lastImagesDir() +} + +// GetCriu function +func (s *Server) GetCriu() *criu.Criu { + return s.cr +} |