python常用代码片

2019-04-15 17:12发布

下载文件 import requests r = requests.get('http://i.pegpic.com/pic/028326/koqn0d5tgankoqn0d5tgan.jpg'); filename = 'D://a.jpg' target = open(filename,'wb') target.write(r._content); target.close(); 文件查找:import glob #获取指定目录下的所有图片print glob.glob(r"E:/Picture/*/*.jpg") #获取上级目录的所有.py文件print glob.glob(r'../*.py') #相对路径 目录文件处理 #coding: utf-8 import os; import re; #主函数 def main(fpath): #枚举fpath目录下的每个文件 for root, dirs, files in os.walk( fpath): for filename in files: full_file = '%s\%s' % (root,filename) print full_file process_a_file(full_file); #处理单个文件 def process_a_file(afile): print afile; ofile = open(afile,'r') wfile = open("out.txt",'a') str_head = "NEW DOC %s " % afile wfile.write(str_head) do_write = 0; for eachline in ofile: aline = eachline.rstrip() if None <> re.match('"*"',aline): if None <> re.match('"toBI"',aline): do_write = 1; else: do_write = 0; else: if do_write == 1: wfile.write(aline); wfile.write(" ") wfile.close() ofile.close() #主函数调用 main('D:Temp');
文件统计: #coding: utf-8 import os; import re; import sys; dic_word = {'test' : 1}; dic_phone = {'HH' : 1}; dic_b_phone = {'HH HA' : 1}; dic_t_phone = {'HH HA HA' : 1}; dic_sen_len = {8: 10};#长度为8个单词的句子有10个 dic_rhythm_word_len = {2 : 5 };#韵律长度统计,单词级别,长度为2个word的韵律短语有5个 dic_rhythm_phone_len = {2 : 5};#韵律长度统计,phone级别,长度为2个phome的韵律短语有5个 #主函数 def main(fpath): dic_word.clear(); dic_phone.clear(); dic_b_phone.clear(); dic_t_phone.clear(); dic_sen_len.clear(); dic_rhythm_word_len.clear(); dic_rhythm_phone_len.clear(); #处理文件 ofile = open(fpath,'r'); order = 1; for eachline in ofile: aline = eachline; if (order % 2 == 1):# 句子行处理 process_word_line(aline[7:]); else: #音标行处理 process_phone_line(aline); order = order + 1; #输出结果 analysis_out(); def process_word_line(aline):#单词行处理 aline = aline.lower(); #韵律短语统计 rhythm_word_calc(aline);# /%隔开统计word级别的韵律短语 #换掉斜线等符号 aline = clean_aline(aline); #切割统计 words = aline.split(' '); words = [w for w in words if w != ""] #统计句子长度 dic_add(dic_sen_len,len(words)); #统计单词 for word in words: dic_add(dic_word, word); return; def process_phone_line(aline): #音标行处理 #韵律短语统计 rhythm_phone_calc(aline);# /.隔开统计phome级别的韵律短语 #换掉斜线等符号 aline = clean_aline(aline); #切割统计 phones = aline.split(' '); #phones.remove(''); phones = [w for w in phones if w != ""] for phone in phones: dic_add(dic_phone, phone); #统计双音子 for i in range(0, len(phones)-1): b_phone = "%s-%s" % (phones[i], phones[i+1]); dic_add(dic_b_phone, b_phone); #统计三音子 for i in range(0, len(phones)-2): t_phone = "%s-%s-%s" % (phones[i], phones[i+1], phones[i+2]); dic_add(dic_t_phone, t_phone); return; def rhythm_word_calc(aline):# 单词以/或者%结尾表示一个韵律短语 aline = clean_aline_word_rhythm(aline); #print(aline) words = aline.split(' '); words = [w for w in words if w != ""] #print(len(words)); pre_pos = -1; for i in range(0, len(words)): if (words[i].find('/') != -1 or words[i].find('%') != -1): dic_add(dic_rhythm_word_len, i-pre_pos); #print 'found %d' % (i-pre_pos); pre_pos = i; #print(words[i]); return; def rhythm_phone_calc(aline):# 单词以/或者%结尾表示一个韵律短语 aline = clean_aline_phone_rhythm(aline); #print(aline) phones = aline.split(' '); phones = [w for w in phones if w != ""] #print(len(phones)); pre_pos = -1; for i in range(0, len(phones)): if (phones[i].find('/') != -1 or phones[i].find('.') != -1): dic_add(dic_rhythm_phone_len, i-pre_pos); #print 'found %d' % (i-pre_pos); pre_pos = i; #print(phones[i]); return; #output results def analysis_out(): output_a_dic(dic_word, 'word.log'); output_a_dic(dic_phone, 'phone.log'); output_a_dic(dic_sen_len, 'sen_len.log'); output_a_dic(dic_b_phone, 'dic_bi_phone.log'); output_a_dic(dic_t_phone, 'dic_tri_phone.log'); output_a_dic(dic_rhythm_word_len, 'dic_rhythm_word_len.log'); output_a_dic(dic_rhythm_phone_len, 'dic_rhythm_phone_len.log'); return; def output_a_dic(a_dic, filename): a_list = sorted(a_dic.iteritems(), key = lambda asd:asd[1], reverse = True);#value进行排序 wfile = open(filename,'w') for a_turp in a_list: aline = '%s %d ' % (a_turp[0], a_turp[1]) wfile.write(aline); wfile.close(); return; #辅助函数 def clean_aline(aline): #print(aline); regex = re.compile(r"/s", re.IGNORECASE); aline = regex.sub(" ",aline); regex = re.compile(r".s", re.IGNORECASE); aline = regex.sub("",aline); regex = re.compile(r",s", re.IGNORECASE); aline = regex.sub("",aline); regex = re.compile(r"!s", re.IGNORECASE); aline = regex.sub("",aline); regex = re.compile(r""s", re.IGNORECASE); aline = regex.sub("",aline); aline = aline.replace("%"," "); aline = aline.rstrip(); aline = aline.lstrip(); #print(aline); return aline; def clean_aline_word_rhythm(aline): regex = re.compile(r".s", re.IGNORECASE); aline = regex.sub("",aline); regex = re.compile(r",s", re.IGNORECASE); aline = regex.sub(" ",aline); regex = re.compile(r"!s", re.IGNORECASE); aline = regex.sub("",aline); regex = re.compile(r""s", re.IGNORECASE); aline = regex.sub("",aline); aline = aline.replace('"',''); aline = aline.rstrip(); aline = aline.lstrip(); return aline; def clean_aline_phone_rhythm(aline): regex = re.compile(r",s", re.IGNORECASE); aline = regex.sub(" ",aline); regex = re.compile(r"!s", re.IGNORECASE); aline = regex.sub("",aline); regex = re.compile(r""s", re.IGNORECASE); aline = regex.sub("",aline); aline = aline.replace(" /","/"); aline = aline.replace(" .","."); aline = aline.rstrip(); aline = aline.lstrip(); aline = aline + '.' return aline; def dic_add(adic, akey): if adic.has_key(akey) != True: adic[akey] = 1; else: v = adic[akey]; adic[akey] = v + 1; return ; if len(sys.argv) == 1:#默认参数 #主函数调用 main('C:\Users\huangzhiqiang\PycharmProjects\untitled\ef4_6k.txt'); elif len(sys.argv) == 2: print(sys.argv[1]); main(str(sys.argv[1])); else: print("parameters error "); #main('C:\Users\huangzhiqiang\PycharmProjects\untitled\ef4_6k.txt');