check-relative-doc-links.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #!/usr/bin/env python
  2. import os
  3. import sys
  4. import re
  5. SOURCE_ROOT = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
  6. DOCS_DIR = os.path.join(SOURCE_ROOT, 'docs')
  7. def main():
  8. os.chdir(SOURCE_ROOT)
  9. filepaths = []
  10. totalDirs = 0
  11. try:
  12. for root, dirs, files in os.walk(DOCS_DIR):
  13. totalDirs += len(dirs)
  14. for f in files:
  15. if f.endswith('.md'):
  16. filepaths.append(os.path.join(root, f))
  17. except KeyboardInterrupt:
  18. print('Keyboard interruption. Please try again.')
  19. return
  20. totalBrokenLinks = 0
  21. for path in filepaths:
  22. totalBrokenLinks += getBrokenLinks(path)
  23. print('Parsed through ' + str(len(filepaths)) +
  24. ' files within docs directory and its ' +
  25. str(totalDirs) + ' subdirectories.')
  26. print('Found ' + str(totalBrokenLinks) + ' broken relative links.')
  27. return totalBrokenLinks
  28. def getBrokenLinks(filepath):
  29. currentDir = os.path.dirname(filepath)
  30. brokenLinks = []
  31. try:
  32. f = open(filepath, 'r')
  33. lines = f.readlines()
  34. except KeyboardInterrupt:
  35. print('Keyboard interruption whle parsing. Please try again.')
  36. finally:
  37. f.close()
  38. regexLink = re.compile('\[(.*?)\]\((?P<links>(.*?))\)')
  39. links = []
  40. for line in lines:
  41. matchLinks = regexLink.search(line)
  42. if matchLinks:
  43. relativeLink = matchLinks.group('links')
  44. if not str(relativeLink).startswith('http'):
  45. links.append(relativeLink)
  46. for link in links:
  47. sections = link.split('#')
  48. if len(sections) < 2:
  49. if not os.path.isfile(os.path.join(currentDir, link)):
  50. brokenLinks.append(link)
  51. elif str(link).startswith('#'):
  52. if not checkSections(sections, lines):
  53. brokenLinks.append(link)
  54. else:
  55. tempFile = os.path.join(currentDir, sections[0])
  56. if os.path.isfile(tempFile):
  57. try:
  58. newFile = open(tempFile, 'r')
  59. newLines = newFile.readlines()
  60. except KeyboardInterrupt:
  61. print('Keyboard interruption whle parsing. Please try again.')
  62. finally:
  63. newFile.close()
  64. if not checkSections(sections, newLines):
  65. brokenLinks.append(link)
  66. else:
  67. brokenLinks.append(link)
  68. print_errors(filepath, brokenLinks)
  69. return len(brokenLinks)
  70. def checkSections(sections, lines):
  71. sectionHeader = sections[1].replace('-', '')
  72. regexSectionTitle = re.compile('# (?P<header>.*)')
  73. for line in lines:
  74. matchHeader = regexSectionTitle.search(line)
  75. if matchHeader:
  76. matchHeader = filter(str.isalnum, str(matchHeader.group('header')))
  77. if matchHeader.lower() == sectionHeader:
  78. return True
  79. return False
  80. def print_errors(filepath, brokenLink):
  81. if brokenLink:
  82. print "File Location: " + filepath
  83. for link in brokenLink:
  84. print "\tBroken links: " + link
  85. if __name__ == '__main__':
  86. sys.exit(main())