黑马程序员技术交流社区

标题: 批量检查Python脚本注释量是否达标和规范的方法 [打印本页]

作者: 专注的一批 时间: 2019-10-15 15:36
标题: 批量检查Python脚本注释量是否达标和规范的方法
听吴军大佬说，在Google，写代码的规范是有严格规定的，那种规定近乎苛刻，不仅要求给变量起名字要有特定的格式，单元测试要覆盖 100% 的代码，程序的注释要占到篇幅的三分之一左右，就连多一个空格都不允许。
任何不符合这样不近人情的规矩的代码，是不能提交的。
刚进入 Google 的新人，前一个月基本都是练习代码规范。
本脚本检查的是 Python，Python 中单行注释以 # 开头，多行注释用三个单引号 \'\'\' 或者三个双引号 \"\"\" 将注释括起来。
如果需要检查其TA编程语言，只需要修改正则表达式的匹配部分即可。
检查是否达标
import os, sys, re

"""
本程序通过计算python源码中"注释的行数",检查源码是否合格
1.本程序可对"#"开头的注释,以及成对的"三双引号"源码进行计数（Python中单行注释以 # 开头，多行注释用三个单引号 \'\'\' 或者三个双引号 \"\"\" 将注释括起来）
2.本程序使用了正则匹配（外汇返佣）
3.注释占源码比例 = 源码内注释行数/源码总行数
"""

# 获取代码文件所在路径
dir_paths = os.listdir(os.getcwd())
print(dir_paths)

# dir_path = os.getcwd() + "/" +sys.argv[0]

dir_path = ""

for dir in dir_paths:
#print("="*20, end='\n\n\n')
# print (dir)
if re.match(r".*py$", dir):
      dir_path_pure = dir
      #print ("--->",dir_path)

      dir_path = os.getcwd() + "/" + dir_path_pure

      code_sum = 0
      with open(dir_path, "r") as my_code:

         my_lines = my_code.readlines()
         serial_num_list = [[], []]

         for serial_num, line in enumerate(my_lines):
            serial_num = serial_num + 1
            if re.match(r"\s*#+.*", line):             # 单行注释，1个井号
                  serial_num_list[0].append(serial_num)

            elif re.match(r".*\"\"\".*", line):       # 多行注释，3个双引号
                  serial_num_list[1].append(serial_num)

            elif re.match(r".*\'\'\'.*", line):       # 多行注释，3个单引号
                  serial_num_list[1].append(serial_num)

            code_sum += 1

      serial_num_sum1 = 0
      serial_num_sum2 = 0

      for ser in serial_num_list[0]:
         serial_num_sum1 += 1
      try:
         for ser_num, value in enumerate(serial_num_list[1]):
            if ser_num % 2 == 0:
                  top_num = value
            else:
                  end_num = value
                  serial_num_sum2 += (int(end_num) - int(top_num) + 1)
      except:
         print("%s源码注释不规范!自动跳过!\n" % dir_path_pure)
         continue

      serial_num_sum = serial_num_sum1 + serial_num_sum2
      if code_sum != 0:
         exp_rate = 100 * (serial_num_sum / code_sum)
         print("%s| 注释的行数为:%d,总行数为%d | 注释率为%d%%\n" % (dir_path_pure, serial_num_sum, code_sum, exp_rate))
      else:
         print('%s| 没有任何代码\n' % dir_path_pure)
检查是否规范
#encoding:utf-8

'''
可能性如下：
1.单独一行注释：
2.有代码有注释
3 }else{//
4.多行注释
/**
   * //获取所有配件重量总和
   * @param index 将要装备的第几个配件槽
   * @param unitId 将要装备配件ID
   */
   5.
     1: "tankmuzzle",//炮管 11
满足要求：
所有的注释都单独一行，除了JSON的注释
保持与下一行同样缩进
'''
import os

#文件列表
fileList = []
writeFileList = []
#代码注释格式声明
codeAnnotationDec = '//'

# str:将要处理的code  saveList:将要存入的列表
def dealCodeWithSpace(lines,willSaveList):

# lines=lines.strip('\n')
#计算代码首部缩进空格长度去掉尾部空格长度 - 去掉首尾空格长度
headSpaceLen = len(lines.rstrip())-len(lines.strip())
#先添加注释(注释代码+首部缩进)
# willSaveList.append(" "*headSpaceLen+lines[lines.find(codeAnnotationDec):len(lines)])
tempAnno = lines[lines.find(codeAnnotationDec): ].rstrip()
if tempAnno.find('\n')>-1:
willSaveList.append(" "*headSpaceLen+tempAnno)
else:
willSaveList.append(" "*headSpaceLen+tempAnno+'\n')
willSaveList.append('\n')

if len(willSaveList)>=2:
if willSaveList[len(willSaveList) -1] == '\n' and willSaveList[len(willSaveList) -2].find(codeAnnotationDec)>-1:
willSaveList.pop()

#再添加代码：
tempCode = lines[0:lines.find(codeAnnotationDec)]
if tempCode.find('\n')>-1:
willSaveList.append(tempCode)
else:
willSaveList.append(tempCode)
willSaveList.append('\n')
# willSaveList.append(lines[0:lines.find(codeAnnotationDec)].rstrip()+'\n')

def doDealCode(readFilePath,writeFile):
#如果传递进来的是文件，就直接加入处理文件列表
if os.path.isfile(readFilePath):
fileList.append(dir.decode("utf-8"))
writeFileList.append(dir.decode("utf-8"))
#把读取目录的所有文件加入文件列表
elif os.path.isdir(readFilePath):
for singleFile in os.listdir(readFilePath):
fileList.append(os.path.join(readFilePath,singleFile))
writeFileList.append(os.path.join(writeFile,singleFile))

if 1:
fileIndex = 0
for readSingleFile in fileList:
#处理后的代码行，每行都会插入列表中
willSaveList =[]
fileIndex = fileList.index(readSingleFile)

#初步处理文件行，依次加入一个列表中
with open(readSingleFile,'r') as fileLine:
while True:
#整行读取数据
lines = fileLine.readline()

if not lines:
break
#如果该行不为空
else:
# 该行有单行注释
if lines.find(codeAnnotationDec)>-1:
#该行为存粹一行注释
if lines.strip().find(codeAnnotationDec) == 0:
willSaveList.append(lines)
#该行既有代码，也有注释
else:
#处理json文件，默认注释和代码放在一行：
if lines.find(":")>-1:
willSaveList.append(lines)

#处理 }else{ 情况  #存在三种情况：1： else{//dfsdfdsfsdf 2:}else{//dsfdfdsf 3.if(j == index){//计算的重量,用将要替换的配件的重量
elif len(lines[0:lines.find(codeAnnotationDec)].rstrip())>0 and lines[0:lines.find(codeAnnotationDec)].rstrip()[-1] == '{':
#第一种情况处理：
if lines.strip().find("else") == 0:
dealCodeWithSpace(lines,willSaveList)
#第二种和第三种情况处理
else:
#第三种情况：
if lines.strip().find('if') == 0:
dealCodeWithSpace(lines,willSaveList)
#第二种情况
else:
#计算代码首部缩进空格长度去掉尾部空格长度 - 去掉首尾空格长度
headSpaceLen = len(lines.rstrip())-len(lines.strip())
# 1.在 else 前面插入一行：
willSaveList.append(lines[0:lines.find("else")]+'\n')

# 求出将要处理的字符串  else{ 缩进要和上一行 { 保持一致
willDealCode = headSpaceLen*' '+lines[lines.find("else"): ]
# 2.按照 code;//dsfdsfsdf 的格式进行处理
dealCodeWithSpace(willDealCode,willSaveList)

#不含特殊情况的注释情况：例如：code;//annotation
else:
dealCodeWithSpace(lines,willSaveList)
#该行没有注释(一行代码或者换行)
else:
# willSaveList[len(willSaveList)-1].scrip().find('\n') == 0:
willSaveList.append(lines)
pass

#写入同名文件到另一个目录下
with open(writeFileList[fileIndex],'w') as tempWrite:
print("------ WRITE SUC------",writeFileList[fileIndex]);
print(writeFileList[fileIndex])
for line in willSaveList:
tempWrite.write(line)

# print("******* SUC *******");

#读取文件的路径
readFilePath = "./readFile"
#写入文件的路径
writeFile = "./writeFile"
#开始处理文件
doDealCode(readFilePath,writeFile)

欢迎光临黑马程序员技术交流社区 (http://bbs.itheima.com/)

黑马程序员IT技术论坛 X3.2