Skip to content

Commit

Permalink
Fix multisync default branch not synced bug (#166)
Browse files Browse the repository at this point in the history
* Fix multisync default branch not synced bug

* Query repositories with batch
  • Loading branch information
pulltheflower authored Nov 5, 2024
1 parent 2053132 commit a1b5eb1
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 2 deletions.
12 changes: 12 additions & 0 deletions builder/store/database/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -630,3 +630,15 @@ func (s *RepoStore) FindWithBatch(ctx context.Context, batchSize, batch int) ([]
Scan(ctx)
return res, err
}

func (s *RepoStore) FindByRepoSourceWithBatch(ctx context.Context, repoSource types.RepositorySource, batchSize, batch int) ([]Repository, error) {
var res []Repository
err := s.db.Operator.Core.NewSelect().
Model(&res).
Where("source = ?", repoSource).
Order("id desc").
Limit(batchSize).
Offset(batchSize * batch).
Scan(ctx)
return res, err
}
105 changes: 105 additions & 0 deletions cmd/csghub-server/cmd/sync/fix_default_branch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package sync

import (
"context"
"fmt"
"log/slog"
"strings"

"github.com/spf13/cobra"
"opencsg.com/csghub-server/builder/multisync"
"opencsg.com/csghub-server/builder/store/database"
"opencsg.com/csghub-server/common/config"
"opencsg.com/csghub-server/common/types"
)

var cmdFixDefaultBranch = &cobra.Command{
Use: "fix-default-branch",
Short: "the cmd to sync repos like models and datasets from remote server like OpenCSG",
PersistentPreRunE: func(cmd *cobra.Command, args []string) (err error) {
config, err := config.LoadConfig()
if err != nil {
return fmt.Errorf("failed to load config,%w", err)
}

dbConfig := database.DBConfig{
Dialect: database.DatabaseDialect(config.Database.Driver),
DSN: config.Database.DSN,
}

database.InitDB(dbConfig)
if err != nil {
return fmt.Errorf("initializing DB connection: %w", err)
}
ctx := context.WithValue(cmd.Context(), "config", config)
cmd.SetContext(ctx)
return
},
Run: func(cmd *cobra.Command, args []string) {
ctx := cmd.Context()
config, ok := ctx.Value("config").(*config.Config)
if !ok {
slog.Error("config not found in context")
return
}

if config.Saas {
return
}

if !config.MultiSync.Enabled {
return
}

repoStore := database.NewRepoStore()
var (
batch = 0
batchSize = 1000
)
for {
repositories, err := repoStore.FindByRepoSourceWithBatch(ctx, types.OpenCSGSource, batchSize, batch)
if err != nil {
slog.Error("failed to find repositories from OpenCSG, error: %w", err)
return
}
if len(repositories) == 0 {
slog.Info("no more repositories found from OpenCSG, quit")
return
}
syncClientSettingStore := database.NewSyncClientSettingStore()
setting, err := syncClientSettingStore.First(ctx)
if err != nil {
slog.Error("failed to find sync client setting, error: %w", err)
return
}
apiDomain := config.MultiSync.SaasAPIDomain
sc := multisync.FromOpenCSG(apiDomain, setting.Token)
for _, repository := range repositories {
var defaultBranch string
repoPath := strings.TrimPrefix(repository.Path, types.OpenCSGPrefix)
if repository.RepositoryType == types.ModelRepo {
modelInfo, err := sc.ModelInfo(ctx, types.SyncVersion{RepoPath: repoPath})
if err != nil {
slog.Error("failed to get model info from OpenCSG Saas", slog.String("repo_path", repoPath), slog.Any("error", err))
continue
}
defaultBranch = modelInfo.DefaultBranch
} else if repository.RepositoryType == types.DatasetRepo {
datasetInfo, err := sc.DatasetInfo(ctx, types.SyncVersion{RepoPath: repoPath})
if err != nil {
slog.Error("failed to get dataset info from OpenCSG Saas", slog.String("repo_path", repoPath), slog.Any("error", err))
continue
}
defaultBranch = datasetInfo.DefaultBranch
}
repository.DefaultBranch = defaultBranch
_, err = repoStore.UpdateRepo(ctx, repository)
if err != nil {
slog.Error("failed to update repository", slog.String("repo_path", repoPath), slog.Any("error", err))
continue
}
}
batch += 1
}
},
}
1 change: 1 addition & 0 deletions cmd/csghub-server/cmd/sync/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
func init() {
// add subcommands here
Cmd.AddCommand(cmdSyncAsClient)
Cmd.AddCommand(cmdFixDefaultBranch)
}

var Cmd = &cobra.Command{
Expand Down
4 changes: 2 additions & 2 deletions component/multi_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func (c *MultiSyncComponent) createLocalDataset(ctx context.Context, m *types.Da
Private: m.Private,
Readme: m.Readme,
// License: req.License,
// DefaultBranch: gitRepo.DefaultBranch,
DefaultBranch: m.DefaultBranch,
RepositoryType: types.DatasetRepo,
Source: types.OpenCSGSource,
SyncStatus: types.SyncStatusPending,
Expand Down Expand Up @@ -330,7 +330,7 @@ func (c *MultiSyncComponent) createLocalModel(ctx context.Context, m *types.Mode
Private: m.Private,
Readme: m.Readme,
// License: req.License,
// DefaultBranch: gitRepo.DefaultBranch,
DefaultBranch: m.DefaultBranch,
RepositoryType: types.ModelRepo,
Source: types.OpenCSGSource,
SyncStatus: types.SyncStatusPending,
Expand Down

0 comments on commit a1b5eb1

Please sign in to comment.