You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

210 lines
6.1 KiB

  1. // Copyright 2018 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package migrations
  5. import (
  6. "fmt"
  7. "regexp"
  8. "strings"
  9. "code.gitea.io/gitea/modules/log"
  10. "xorm.io/xorm"
  11. )
  12. var topicPattern = regexp.MustCompile(`^[a-z0-9][a-z0-9-]*$`)
  13. func validateTopic(topic string) bool {
  14. return len(topic) <= 35 && topicPattern.MatchString(topic)
  15. }
  16. func reformatAndRemoveIncorrectTopics(x *xorm.Engine) (err error) {
  17. log.Info("This migration could take up to minutes, please be patient.")
  18. type Topic struct {
  19. ID int64
  20. Name string `xorm:"UNIQUE VARCHAR(25)"`
  21. RepoCount int
  22. CreatedUnix int64 `xorm:"INDEX created"`
  23. UpdatedUnix int64 `xorm:"INDEX updated"`
  24. }
  25. type RepoTopic struct {
  26. RepoID int64 `xorm:"UNIQUE(s)"`
  27. TopicID int64 `xorm:"UNIQUE(s)"`
  28. }
  29. type Repository struct {
  30. ID int64 `xorm:"pk autoincr"`
  31. Topics []string `xorm:"TEXT JSON"`
  32. }
  33. if err := x.Sync2(new(Topic)); err != nil {
  34. return fmt.Errorf("Sync2: %v", err)
  35. }
  36. if err := x.Sync2(new(RepoTopic)); err != nil {
  37. return fmt.Errorf("Sync2: %v", err)
  38. }
  39. sess := x.NewSession()
  40. defer sess.Close()
  41. const batchSize = 100
  42. touchedRepo := make(map[int64]struct{})
  43. delTopicIDs := make([]int64, 0, batchSize)
  44. log.Info("Validating existed topics...")
  45. if err := sess.Begin(); err != nil {
  46. return err
  47. }
  48. for start := 0; ; start += batchSize {
  49. topics := make([]*Topic, 0, batchSize)
  50. if err := x.Cols("id", "name").Asc("id").Limit(batchSize, start).Find(&topics); err != nil {
  51. return err
  52. }
  53. if len(topics) == 0 {
  54. break
  55. }
  56. for _, topic := range topics {
  57. if validateTopic(topic.Name) {
  58. continue
  59. }
  60. log.Info("Incorrect topic: id = %v, name = %q", topic.ID, topic.Name)
  61. topic.Name = strings.Replace(strings.TrimSpace(strings.ToLower(topic.Name)), " ", "-", -1)
  62. ids := make([]int64, 0, 30)
  63. if err := sess.Table("repo_topic").Cols("repo_id").
  64. Where("topic_id = ?", topic.ID).Find(&ids); err != nil {
  65. return err
  66. }
  67. log.Info("Touched repo ids: %v", ids)
  68. for _, id := range ids {
  69. touchedRepo[id] = struct{}{}
  70. }
  71. if validateTopic(topic.Name) {
  72. unifiedTopic := Topic{Name: topic.Name}
  73. exists, err := sess.Cols("id", "name").Get(&unifiedTopic)
  74. log.Info("Exists topic with the name %q? %v, id = %v", topic.Name, exists, unifiedTopic.ID)
  75. if err != nil {
  76. return err
  77. }
  78. if exists {
  79. log.Info("Updating repo_topic rows with topic_id = %v to topic_id = %v", topic.ID, unifiedTopic.ID)
  80. if _, err := sess.Where("topic_id = ? AND repo_id NOT IN "+
  81. "(SELECT rt1.repo_id FROM repo_topic rt1 INNER JOIN repo_topic rt2 "+
  82. "ON rt1.repo_id = rt2.repo_id WHERE rt1.topic_id = ? AND rt2.topic_id = ?)",
  83. topic.ID, topic.ID, unifiedTopic.ID).Update(&RepoTopic{TopicID: unifiedTopic.ID}); err != nil {
  84. return err
  85. }
  86. log.Info("Updating topic `repo_count` field")
  87. if _, err := sess.Exec(
  88. "UPDATE topic SET repo_count = (SELECT COUNT(*) FROM repo_topic WHERE topic_id = ? GROUP BY topic_id) WHERE id = ?",
  89. unifiedTopic.ID, unifiedTopic.ID); err != nil {
  90. return err
  91. }
  92. } else {
  93. log.Info("Updating topic: id = %v, name = %q", topic.ID, topic.Name)
  94. if _, err := sess.Table("topic").ID(topic.ID).
  95. Update(&Topic{Name: topic.Name}); err != nil {
  96. return err
  97. }
  98. continue
  99. }
  100. }
  101. delTopicIDs = append(delTopicIDs, topic.ID)
  102. }
  103. }
  104. if err := sess.Commit(); err != nil {
  105. return err
  106. }
  107. sess.Init()
  108. log.Info("Deleting incorrect topics...")
  109. if err := sess.Begin(); err != nil {
  110. return err
  111. }
  112. log.Info("Deleting 'repo_topic' rows for topics with ids = %v", delTopicIDs)
  113. if _, err := sess.In("topic_id", delTopicIDs).Delete(&RepoTopic{}); err != nil {
  114. return err
  115. }
  116. log.Info("Deleting topics with id = %v", delTopicIDs)
  117. if _, err := sess.In("id", delTopicIDs).Delete(&Topic{}); err != nil {
  118. return err
  119. }
  120. if err := sess.Commit(); err != nil {
  121. return err
  122. }
  123. delRepoTopics := make([]*RepoTopic, 0, batchSize)
  124. log.Info("Checking the number of topics in the repositories...")
  125. for start := 0; ; start += batchSize {
  126. repoTopics := make([]*RepoTopic, 0, batchSize)
  127. if err := x.Cols("repo_id").Asc("repo_id").Limit(batchSize, start).
  128. GroupBy("repo_id").Having("COUNT(*) > 25").Find(&repoTopics); err != nil {
  129. return err
  130. }
  131. if len(repoTopics) == 0 {
  132. break
  133. }
  134. log.Info("Number of repositories with more than 25 topics: %v", len(repoTopics))
  135. for _, repoTopic := range repoTopics {
  136. touchedRepo[repoTopic.RepoID] = struct{}{}
  137. tmpRepoTopics := make([]*RepoTopic, 0, 30)
  138. if err := x.Where("repo_id = ?", repoTopic.RepoID).Find(&tmpRepoTopics); err != nil {
  139. return err
  140. }
  141. log.Info("Repository with id = %v has %v topics", repoTopic.RepoID, len(tmpRepoTopics))
  142. for i := len(tmpRepoTopics) - 1; i > 24; i-- {
  143. delRepoTopics = append(delRepoTopics, tmpRepoTopics[i])
  144. }
  145. }
  146. }
  147. sess.Init()
  148. log.Info("Deleting superfluous topics for repositories (more than 25 topics)...")
  149. if err := sess.Begin(); err != nil {
  150. return err
  151. }
  152. for _, repoTopic := range delRepoTopics {
  153. log.Info("Deleting 'repo_topic' rows for 'repository' with id = %v. Topic id = %v",
  154. repoTopic.RepoID, repoTopic.TopicID)
  155. if _, err := sess.Where("repo_id = ? AND topic_id = ?", repoTopic.RepoID,
  156. repoTopic.TopicID).Delete(&RepoTopic{}); err != nil {
  157. return err
  158. }
  159. if _, err := sess.Exec(
  160. "UPDATE topic SET repo_count = (SELECT repo_count FROM topic WHERE id = ?) - 1 WHERE id = ?",
  161. repoTopic.TopicID, repoTopic.TopicID); err != nil {
  162. return err
  163. }
  164. }
  165. log.Info("Updating repositories 'topics' fields...")
  166. for repoID := range touchedRepo {
  167. topicNames := make([]string, 0, 30)
  168. if err := sess.Table("topic").Cols("name").
  169. Join("INNER", "repo_topic", "repo_topic.topic_id = topic.id").
  170. Where("repo_topic.repo_id = ?", repoID).Desc("topic.repo_count").Find(&topicNames); err != nil {
  171. return err
  172. }
  173. log.Info("Updating 'topics' field for repository with id = %v", repoID)
  174. if _, err := sess.ID(repoID).Cols("topics").
  175. Update(&Repository{Topics: topicNames}); err != nil {
  176. return err
  177. }
  178. }
  179. return sess.Commit()
  180. }