You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

319 lines
9.0 KiB

  1. // Copyright 2017 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package git
  5. import (
  6. "bufio"
  7. "context"
  8. "fmt"
  9. "os/exec"
  10. "path"
  11. "runtime"
  12. "strconv"
  13. "strings"
  14. "sync"
  15. "time"
  16. )
  17. const (
  18. // parameters for searching for commit infos. If the untargeted search has
  19. // not found any entries in the past 5 commits, and 12 or fewer entries
  20. // remain, then we'll just let the targeted-searching threads finish off,
  21. // and stop the untargeted search to not interfere.
  22. deferToTargetedSearchColdStreak = 5
  23. deferToTargetedSearchNumRemainingEntries = 12
  24. )
  25. // getCommitsInfoState shared state while getting commit info for entries
  26. type getCommitsInfoState struct {
  27. lock sync.Mutex
  28. /* read-only fields, can be read without the mutex */
  29. // entries and entryPaths are read-only after initialization, so they can
  30. // safely be read without the mutex
  31. entries []*TreeEntry
  32. // set of filepaths to get info for
  33. entryPaths map[string]struct{}
  34. treePath string
  35. headCommit *Commit
  36. /* mutable fields, must hold mutex to read or write */
  37. // map from filepath to commit
  38. commits map[string]*Commit
  39. // set of filepaths that have been or are being searched for in a target search
  40. targetedPaths map[string]struct{}
  41. }
  42. func (state *getCommitsInfoState) numRemainingEntries() int {
  43. state.lock.Lock()
  44. defer state.lock.Unlock()
  45. return len(state.entries) - len(state.commits)
  46. }
  47. // getTargetEntryPath Returns the next path for a targeted-searching thread to
  48. // search for, or returns the empty string if nothing left to search for
  49. func (state *getCommitsInfoState) getTargetedEntryPath() string {
  50. var targetedEntryPath string
  51. state.lock.Lock()
  52. defer state.lock.Unlock()
  53. for _, entry := range state.entries {
  54. entryPath := path.Join(state.treePath, entry.Name())
  55. if _, ok := state.commits[entryPath]; ok {
  56. continue
  57. } else if _, ok = state.targetedPaths[entryPath]; ok {
  58. continue
  59. }
  60. targetedEntryPath = entryPath
  61. state.targetedPaths[entryPath] = struct{}{}
  62. break
  63. }
  64. return targetedEntryPath
  65. }
  66. // repeatedly perform targeted searches for unpopulated entries
  67. func targetedSearch(state *getCommitsInfoState, done chan error) {
  68. for {
  69. entryPath := state.getTargetedEntryPath()
  70. if len(entryPath) == 0 {
  71. done <- nil
  72. return
  73. }
  74. command := NewCommand("rev-list", "-1", state.headCommit.ID.String(), "--", entryPath)
  75. output, err := command.RunInDir(state.headCommit.repo.Path)
  76. if err != nil {
  77. done <- err
  78. return
  79. }
  80. id, err := NewIDFromString(strings.TrimSpace(output))
  81. if err != nil {
  82. done <- err
  83. return
  84. }
  85. commit, err := state.headCommit.repo.getCommit(id)
  86. if err != nil {
  87. done <- err
  88. return
  89. }
  90. state.update(entryPath, commit)
  91. }
  92. }
  93. func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitsInfoState {
  94. entryPaths := make(map[string]struct{}, len(entries))
  95. for _, entry := range entries {
  96. entryPaths[path.Join(treePath, entry.Name())] = struct{}{}
  97. }
  98. if treePath = path.Clean(treePath); treePath == "." {
  99. treePath = ""
  100. }
  101. return &getCommitsInfoState{
  102. entries: entries,
  103. entryPaths: entryPaths,
  104. commits: make(map[string]*Commit, len(entries)),
  105. targetedPaths: make(map[string]struct{}, len(entries)),
  106. treePath: treePath,
  107. headCommit: headCommit,
  108. }
  109. }
  110. // GetCommitsInfo gets information of all commits that are corresponding to these entries
  111. func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
  112. state := initGetCommitInfoState(tes, commit, treePath)
  113. if err := getCommitsInfo(state); err != nil {
  114. return nil, err
  115. }
  116. if len(state.commits) < len(state.entryPaths) {
  117. return nil, fmt.Errorf("could not find commits for all entries")
  118. }
  119. commitsInfo := make([][]interface{}, len(tes))
  120. for i, entry := range tes {
  121. commit, ok := state.commits[path.Join(treePath, entry.Name())]
  122. if !ok {
  123. return nil, fmt.Errorf("could not find commit for %s", entry.Name())
  124. }
  125. switch entry.Type {
  126. case ObjectCommit:
  127. subModuleURL := ""
  128. if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
  129. return nil, err
  130. } else if subModule != nil {
  131. subModuleURL = subModule.URL
  132. }
  133. subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
  134. commitsInfo[i] = []interface{}{entry, subModuleFile}
  135. default:
  136. commitsInfo[i] = []interface{}{entry, commit}
  137. }
  138. }
  139. return commitsInfo, nil
  140. }
  141. func (state *getCommitsInfoState) cleanEntryPath(rawEntryPath string) (string, error) {
  142. if rawEntryPath[0] == '"' {
  143. var err error
  144. rawEntryPath, err = strconv.Unquote(rawEntryPath)
  145. if err != nil {
  146. return rawEntryPath, err
  147. }
  148. }
  149. var entryNameStartIndex int
  150. if len(state.treePath) > 0 {
  151. entryNameStartIndex = len(state.treePath) + 1
  152. }
  153. if index := strings.IndexByte(rawEntryPath[entryNameStartIndex:], '/'); index >= 0 {
  154. return rawEntryPath[:entryNameStartIndex+index], nil
  155. }
  156. return rawEntryPath, nil
  157. }
  158. // update report that the given path was last modified by the given commit.
  159. // Returns whether state.commits was updated
  160. func (state *getCommitsInfoState) update(entryPath string, commit *Commit) bool {
  161. if _, ok := state.entryPaths[entryPath]; !ok {
  162. return false
  163. }
  164. var updated bool
  165. state.lock.Lock()
  166. defer state.lock.Unlock()
  167. if _, ok := state.commits[entryPath]; !ok {
  168. state.commits[entryPath] = commit
  169. updated = true
  170. }
  171. return updated
  172. }
  173. const getCommitsInfoPretty = "--pretty=format:%H %ct %s"
  174. func getCommitsInfo(state *getCommitsInfoState) error {
  175. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
  176. defer cancel()
  177. args := []string{"log", state.headCommit.ID.String(), getCommitsInfoPretty, "--name-status", "-c"}
  178. if len(state.treePath) > 0 {
  179. args = append(args, "--", state.treePath)
  180. }
  181. cmd := exec.CommandContext(ctx, "git", args...)
  182. cmd.Dir = state.headCommit.repo.Path
  183. readCloser, err := cmd.StdoutPipe()
  184. if err != nil {
  185. return err
  186. }
  187. if err := cmd.Start(); err != nil {
  188. return err
  189. }
  190. // it's okay to ignore the error returned by cmd.Wait(); we expect the
  191. // subprocess to sometimes have a non-zero exit status, since we may
  192. // prematurely close stdout, resulting in a broken pipe.
  193. defer cmd.Wait()
  194. numThreads := runtime.NumCPU()
  195. done := make(chan error, numThreads)
  196. for i := 0; i < numThreads; i++ {
  197. go targetedSearch(state, done)
  198. }
  199. scanner := bufio.NewScanner(readCloser)
  200. err = state.processGitLogOutput(scanner)
  201. // it is important that we close stdout here; if we do not close
  202. // stdout, the subprocess will keep running, and the deffered call
  203. // cmd.Wait() may block for a long time.
  204. if closeErr := readCloser.Close(); closeErr != nil && err == nil {
  205. err = closeErr
  206. }
  207. for i := 0; i < numThreads; i++ {
  208. doneErr := <-done
  209. if doneErr != nil && err == nil {
  210. err = doneErr
  211. }
  212. }
  213. return err
  214. }
  215. func (state *getCommitsInfoState) processGitLogOutput(scanner *bufio.Scanner) error {
  216. // keep a local cache of seen paths to avoid acquiring a lock for paths
  217. // we've already seen
  218. seenPaths := make(map[string]struct{}, len(state.entryPaths))
  219. // number of consecutive commits without any finds
  220. coldStreak := 0
  221. var commit *Commit
  222. var err error
  223. for scanner.Scan() {
  224. line := scanner.Text()
  225. if len(line) == 0 { // in-between commits
  226. numRemainingEntries := state.numRemainingEntries()
  227. if numRemainingEntries == 0 {
  228. break
  229. }
  230. if coldStreak >= deferToTargetedSearchColdStreak &&
  231. numRemainingEntries <= deferToTargetedSearchNumRemainingEntries {
  232. // stop this untargeted search, and let the targeted-search threads
  233. // finish the work
  234. break
  235. }
  236. continue
  237. }
  238. if line[0] >= 'A' && line[0] <= 'X' { // a file was changed by the current commit
  239. // look for the last tab, since for copies (C) and renames (R) two
  240. // filenames are printed: src, then dest
  241. tabIndex := strings.LastIndexByte(line, '\t')
  242. if tabIndex < 1 {
  243. return fmt.Errorf("misformatted line: %s", line)
  244. }
  245. entryPath, err := state.cleanEntryPath(line[tabIndex+1:])
  246. if err != nil {
  247. return err
  248. }
  249. if _, ok := seenPaths[entryPath]; !ok {
  250. if state.update(entryPath, commit) {
  251. coldStreak = 0
  252. }
  253. seenPaths[entryPath] = struct{}{}
  254. }
  255. continue
  256. }
  257. // a new commit
  258. commit, err = parseCommitInfo(line)
  259. if err != nil {
  260. return err
  261. }
  262. coldStreak++
  263. }
  264. return scanner.Err()
  265. }
  266. // parseCommitInfo parse a commit from a line of `git log` output. Expects the
  267. // line to be formatted according to getCommitsInfoPretty.
  268. func parseCommitInfo(line string) (*Commit, error) {
  269. if len(line) < 43 {
  270. return nil, fmt.Errorf("invalid git output: %s", line)
  271. }
  272. ref, err := NewIDFromString(line[:40])
  273. if err != nil {
  274. return nil, err
  275. }
  276. spaceIndex := strings.IndexByte(line[41:], ' ')
  277. if spaceIndex < 0 {
  278. return nil, fmt.Errorf("invalid git output: %s", line)
  279. }
  280. unixSeconds, err := strconv.Atoi(line[41 : 41+spaceIndex])
  281. if err != nil {
  282. return nil, err
  283. }
  284. message := line[spaceIndex+42:]
  285. return &Commit{
  286. ID: ref,
  287. CommitMessage: message,
  288. Committer: &Signature{
  289. When: time.Unix(int64(unixSeconds), 0),
  290. },
  291. }, nil
  292. }