Demuxes the OneDrive and Google Drive files.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
drive-demuxer/drive-demuxer.go

437 lines
9.4 KiB

package main
import (
"bytes"
"flag"
"io"
"log"
"os"
"path"
"sync"
"github.com/cheggaaa/pb/v3"
)
type TopLevel struct {
LeftInput string
RightInput string
LeftOutput string
CombinedOutput string
RightOutput string
OpenHandles chan int
DryRun bool
Bar *pb.ProgressBar
}
type Step struct {
*TopLevel
Subpath string
}
type Direction byte
const (
LEFT Direction = iota
RIGHT
COMBINED
)
type FileState byte
const (
MISSING FileState = iota
FILE
DIRECTORY
UNKNOWN
)
func (s *Step) CheckState(filePath string) (out FileState, err error) {
info, err := os.Stat(filePath)
if err != nil {
if os.IsNotExist(err) {
return MISSING, nil
} else {
return UNKNOWN, err
}
}
if info.IsDir() {
return DIRECTORY, nil
} else if info.Mode().IsRegular() {
return FILE, nil
} else {
return UNKNOWN, nil
}
}
func (s *Step) InputPath(child string, side Direction) string {
var basePath string
switch side {
case LEFT:
basePath = s.LeftInput
case RIGHT:
basePath = s.RightInput
default:
panic("Unexpected side for input path")
}
return path.Join(basePath, s.Subpath, child)
}
func (s *Step) OutputPath(child string, side Direction) string {
var basePath string
switch side {
case LEFT:
basePath = s.LeftOutput
case RIGHT:
basePath = s.RightOutput
case COMBINED:
basePath = s.CombinedOutput
default:
panic("Unexpected side for output path")
}
return path.Join(basePath, s.Subpath, child)
}
func (s *Step) Separate(child string) {
s.SeparateLeft(child)
s.SeparateRight(child)
}
func (s *Step) SeparateLeft(child string) {
leftInPath := s.InputPath(child, LEFT)
leftOutPath := s.OutputPath(child, LEFT)
if s.DryRun {
return
}
leftBase := path.Dir(leftOutPath)
err := os.MkdirAll(leftBase, 0755)
if err != nil {
log.Printf("Failed creating %s: %s\n", leftBase, err)
}
err = os.Rename(leftInPath, leftOutPath)
if err != nil && !os.IsNotExist(err) {
log.Printf("Failed moving %s to %s: %s\n", leftInPath, leftOutPath, err)
}
}
func (s *Step) SeparateRight(child string) {
rightInPath := s.InputPath(child, RIGHT)
rightOutPath := s.OutputPath(child, RIGHT)
if s.DryRun {
return
}
rightBase := path.Dir(rightOutPath)
err := os.MkdirAll(rightBase, 0755)
if err != nil {
log.Printf("Failed creating %s: %s\n", rightBase, err)
}
err = os.Rename(rightInPath, rightOutPath)
if err != nil && !os.IsNotExist(err) {
log.Printf("Failed moving %s to %s: %s\n", rightInPath, rightOutPath, err)
}
}
func (s *Step) Combine(child string) {
leftInPath := s.InputPath(child, LEFT)
rightInPath := s.InputPath(child, RIGHT)
combinedOutPath := s.OutputPath(child, COMBINED)
if s.DryRun {
return
}
combinedBase := path.Dir(combinedOutPath)
err := os.MkdirAll(combinedBase, 0755)
if err != nil {
log.Printf("Failed creating %s: %s\n", combinedBase, err)
}
err = os.Rename(leftInPath, combinedOutPath)
if err != nil && !os.IsNotExist(err) {
log.Printf("Failed moving %s to %s: %s\n", leftInPath, combinedOutPath, err)
}
err = os.Remove(rightInPath)
if err != nil && !os.IsNotExist(err) {
log.Printf("Failed removing %s: %s\n", rightInPath, err)
}
}
func (s *Step) MakeCombinedDir(child string) {
combinedOutPath := s.OutputPath(child, COMBINED)
if s.DryRun {
return
}
err := os.MkdirAll(combinedOutPath, 0755)
if err != nil {
log.Printf("Failed creating %s: %s\n", combinedOutPath, err)
}
}
func (s *Step) RemoveInputDirs(child string) {
leftInPath := s.InputPath(child, LEFT)
rightInPath := s.InputPath(child, RIGHT)
if s.DryRun {
return
}
err := os.Remove(leftInPath)
if err != nil && !os.IsNotExist(err) {
log.Printf("Failed removing %s: %s\n", leftInPath, err)
}
err = os.Remove(rightInPath)
if err != nil && !os.IsNotExist(err) {
log.Printf("Failed removing %s: %s\n", rightInPath, err)
}
}
func (s *Step) ListChildren() []string {
s.OpenHandles <- 1
defer func() {
<-s.OpenHandles
}()
leftInPath := s.InputPath("", LEFT)
rightInPath := s.InputPath("", RIGHT)
results := make(map[string]bool)
leftFiles, err := os.ReadDir(leftInPath)
if err != nil {
log.Printf("Failed listing %s: %s\n", leftInPath, err)
}
rightFiles, err := os.ReadDir(rightInPath)
if err != nil {
log.Printf("Failed listing %s: %s\n", rightInPath, err)
}
for _, file := range leftFiles {
results[file.Name()] = true
}
for _, file := range rightFiles {
results[file.Name()] = true
}
out := make([]string, 0, len(results))
for file := range results {
out = append(out, file)
}
return out
}
func (s *Step) AreFilesIdentical(child string) bool {
s.OpenHandles <- 1
defer func() {
<-s.OpenHandles
}()
leftInPath := s.InputPath(child, LEFT)
rightInPath := s.InputPath(child, RIGHT)
leftInfo, err := os.Stat(leftInPath)
if err != nil {
log.Printf("Error statting path %s: %s\n", leftInPath, err)
return false
}
rightInfo, err := os.Stat(rightInPath)
if err != nil {
log.Printf("Error statting path %s: %s\n", rightInPath, err)
return false
}
if leftInfo.Size() != rightInfo.Size() {
return false
}
leftFile, err := os.OpenFile(leftInPath, os.O_RDONLY, 0)
if err != nil {
log.Printf("Error opening path %s: %s\n", leftInPath, err)
return false
}
defer func() {
err := leftFile.Close()
if err != nil {
log.Printf("Error closing %s: %s\n", leftInPath, err)
}
}()
rightFile, err := os.OpenFile(rightInPath, os.O_RDONLY, 0)
if err != nil {
log.Printf("Error opening path %s: %s\n", rightInPath, err)
return false
}
defer func() {
err := rightFile.Close()
if err != nil {
log.Printf("Error closing %s: %s\n", rightInPath, err)
}
}()
// 4 MB buffers
var leftData [4194304]byte
var rightData [4194304]byte
for {
leftRead, err := leftFile.Read(leftData[:])
if err != nil && err != io.EOF {
log.Printf("Error reading %s: %s\n", leftInPath, err)
return false
}
rightRead, err := rightFile.Read(rightData[:])
if err != nil && err != io.EOF {
log.Printf("Error reading %s: %s\n", rightInPath, err)
return false
}
if leftRead != rightRead {
return false
}
if !bytes.Equal(leftData[:leftRead], rightData[:rightRead]) {
return false
}
if err == io.EOF {
return true
}
}
}
func (s *Step) Walk() {
wg := sync.WaitGroup{}
children := s.ListChildren()
s.Bar.AddTotal(int64(len(children)))
for _, child := range children {
rightPath := s.InputPath(child, RIGHT)
rightState, err := s.CheckState(rightPath)
if err != nil {
wg.Add(1)
go func(rightPath string) {
defer func() {
wg.Done()
}()
log.Printf("Error statting path %s: %s\n", rightPath, err)
s.Bar.Increment()
}(rightPath)
continue
} else if rightState == UNKNOWN {
wg.Add(1)
go func(rightPath string) {
defer func() {
wg.Done()
}()
log.Printf("Unknown stat value for path %s\n", rightPath)
s.Bar.Increment()
}(rightPath)
continue
} else if rightState == MISSING {
wg.Add(1)
go func(child string) {
defer func() {
wg.Done()
}()
s.SeparateLeft(child)
s.Bar.Increment()
}(child)
continue
}
leftPath := s.InputPath(child, LEFT)
leftState, err := s.CheckState(leftPath)
if err != nil {
wg.Add(1)
go func(leftPath string) {
defer func() {
wg.Done()
}()
log.Printf("Error statting path %s: %s\n", leftPath, err)
s.Bar.Increment()
}(leftPath)
continue
} else if leftState == UNKNOWN {
wg.Add(1)
go func(leftPath string) {
defer func() {
wg.Done()
}()
log.Printf("Unknown stat value for path %s\n", leftPath)
s.Bar.Increment()
}(leftPath)
continue
} else if leftState == MISSING {
wg.Add(1)
go func(child string) {
defer func() {
wg.Done()
}()
s.SeparateRight(child)
s.Bar.Increment()
}(child)
continue
}
switch rightState {
case FILE:
wg.Add(1)
go func(child string) {
defer func() {
wg.Done()
}()
if leftState == FILE && s.AreFilesIdentical(child) {
s.Combine(child)
s.Bar.Increment()
} else {
s.Separate(child)
s.Bar.Increment()
}
}(child)
case DIRECTORY:
if leftState == DIRECTORY {
substep := Step{
TopLevel: s.TopLevel,
Subpath: path.Join(s.Subpath, child),
}
wg.Add(1)
go func(child string, substep *Step) {
defer func() {
wg.Done()
}()
s.MakeCombinedDir(child)
substep.Walk()
s.RemoveInputDirs(child)
s.Bar.Increment()
}(child, &substep)
} else {
wg.Add(1)
go func(child string) {
defer func() {
wg.Done()
}()
s.Separate(child)
s.Bar.Increment()
}(child)
}
default:
panic("Unexpected state")
}
}
wg.Wait()
}
func main() {
settings := TopLevel{
OpenHandles: make(chan int, 450),
Bar: pb.StartNew(1),
}
flag.StringVar(&settings.LeftInput,
"left-input", "./input/left", "The name of the left side of the input.")
flag.StringVar(&settings.RightInput,
"right-input", "./input/right", "The name of the right side of the input.")
flag.StringVar(&settings.LeftOutput,
"left-output", "./output/left", "The name of the left side of the output.")
flag.StringVar(&settings.CombinedOutput,
"combined-output", "./output/combined", "The name of the combined side of the output.")
flag.StringVar(&settings.RightOutput,
"right-output", "./output/right", "The name of the right side of the output.")
flag.BoolVar(&settings.DryRun,
"dry-run", true, "True if no actual operation should be performed.")
flag.Parse()
(&Step{
TopLevel: &settings,
Subpath: "",
}).Walk()
settings.Bar.Increment()
}