您现在的位置是:网站首页> 编程资料编程资料
python常用小脚本实例总结_python_
2023-05-26
333人已围观
简介 python常用小脚本实例总结_python_
前言
日常生活中常会遇到一些小任务,如果人工处理会很麻烦。
用python做些小脚本处理,能够提高不少效率。或者可以把python当工具使用,辅助提高一下办公效率。(比如我常拿python当计算器,计算和字符转换用)
以下总结下个人用到的一些python小脚本留作备忘。
打印16进制字符串
用途:通信报文中的hex数据不好看,可以打印为16进制的字符串显示出来。
#coding=utf-8 #name: myutil.py def print_hex1(s,prev='0x'): for c in s: print '%s%02x' %(prev,ord(c)), print def print_hex(s): for c in s: print '%02x' %(ord(c)), print print 'myutil' def print_hex3(s,prev='0x'): i = 0 for c in s: print '%s%s,' %(prev,s[i:i+2]), i += 2 print
文件合并
之前搞单片机时生成的hex应用程序文件不能直接刷到单片机里,还需要把iap程序合并成一个文件才能烧写到单片机。每次打包麻烦,做个脚本处理:
#path='C:\\Users\\test\\IAP_CZ_v204w.hex' #file=open(path,'r') #for ll in file.readlines() # print ll #coding=gb18030 import time import os def prr(): print 'file combination begin..' path0=os.getcwd() print path0 path=path0 #path1=path0 path2=path0 path+='\\IAP_CZ_v204w.hex' #path1+='\\NC_armStaSystem.hex' path2+='\\' print path s=raw_input('enter file path:') path1=s #path1+='\\NC_armStaSystem.hex' print path1 s=raw_input('enter file name:') path2+=s path2+=time.strftime('_%y%m%d%H%M%S') path2+='.hex' print path2 prr() try: f1=open(path,'r') count=0 for l in f1.readlines(): # print l count+=1 #print count f1.close() f1=open(path,'r') f2=open(path1,'r') f3=open(path2,'w') while(count>1): l=f1.readline() # print l f3.write(l) count-=1 # print count f3.flush() for l in f2.readlines(): f3.write(l) f3.flush() f3.close() print 'combination success!' except Exception,ex: print 'excettion occured!' print ex s=raw_input('press any key to continue...') finally: f1.close() f2.close() s=raw_input('press any key to continue...') 多线程下载图集
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
#!/usr/bin/python # -*- coding: utf-8 -*- # filename: paxel.py '''It is a multi-thread downloading tool It was developed follow axel. Author: volans E-mail: volansw [at] gmail.com ''' import sys import os import time import urllib from threading import Thread local_proxies = {'http': 'http://131.139.58.200:8080'} class AxelPython(Thread, urllib.FancyURLopener): '''Multi-thread downloading class. run() is a vitural method of Thread. ''' def __init__(self, threadname, url, filename, ranges=0, proxies={}): Thread.__init__(self, name=threadname) urllib.FancyURLopener.__init__(self, proxies) self.name = threadname self.url = url self.filename = filename self.ranges = ranges self.downloaded = 0 def run(self): '''vertual function in Thread''' try: self.downloaded = os.path.getsize( self.filename ) except OSError: #print 'never downloaded' self.downloaded = 0 # rebuild start poind self.startpoint = self.ranges[0] + self.downloaded # This part is completed if self.startpoint >= self.ranges[1]: print 'Part %s has been downloaded over.' % self.filename return self.oneTimeSize = 16384 #16kByte/time print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1]) self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1])) self.urlhandle = self.open( self.url ) data = self.urlhandle.read( self.oneTimeSize ) while data: filehandle = open( self.filename, 'ab+' ) filehandle.write( data ) filehandle.close() self.downloaded += len( data ) #print "%s" % (self.name) #progress = u'\r...' data = self.urlhandle.read( self.oneTimeSize ) def GetUrlFileSize(url, proxies={}): urlHandler = urllib.urlopen( url, proxies=proxies ) headers = urlHandler.info().headers length = 0 for header in headers: if header.find('Length') != -1: length = header.split(':')[-1].strip() length = int(length) return length def SpliteBlocks(totalsize, blocknumber): blocksize = totalsize/blocknumber ranges = [] for i in range(0, blocknumber-1): ranges.append((i*blocksize, i*blocksize +blocksize - 1)) ranges.append(( blocksize*(blocknumber-1), totalsize -1 )) return ranges def islive(tasks): for task in tasks: if task.isAlive(): return True return False def paxel(url, output, blocks=6, proxies=local_proxies): ''' paxel ''' size = GetUrlFileSize( url, proxies ) ranges = SpliteBlocks( size, blocks ) threadname = [ "thread_%d" % i for i in range(0, blocks) ] filename = [ "tmpfile_%d" % i for i in range(0, blocks) ] tasks = [] for i in range(0,blocks): task = AxelPython( threadname[i], url, filename[i], ranges[i] ) task.setDaemon( True ) task.start() tasks.append( task ) time.sleep( 2 ) while islive(tasks): downloaded = sum( [task.downloaded for task in tasks] ) process = downloaded/float(size)*100 show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process) sys.stdout.write(show) sys.stdout.flush() time.sleep( 0.5 ) filehandle = open( output, 'wb+' ) for i in filename: f = open( i, 'rb' ) filehandle.write( f.read() ) f.close() try: os.remove(i) pass except: pass filehandle.close() if __name__ == '__main__': url = "http://xz1.mm667.com/xz84/images/001.jpg" output = '001.jpg' paxel( url, output, blocks=4, proxies={} ) 多线程下载图片
多线程下载图片并存储到指定目录中,若目录不存在则自动创建。
# -*- coding: UTF-8 -*- ''' import re import urllib urls='http://xz5.mm667.com/xz82/images/01.jpg' def getHtml(url): page = urllib.urlopen(url) html = page.read() return html def getImg(html): reg = r'src="(.+?\.jpg)" pic_ext' imgre = re.compile(reg) imglist = imgre.findall(html) x = 0 for imgurl in imglist: urllib.urlretrieve(imgurl,'%s.jpg' % x) x = x + 1 html = getHtml("http://tieba.baidu.com/p/2460150866") getImg(html) ''' import re import urllib import threading import time import socket socket.setdefaulttimeout(30) urls=[] j=0 for i in xrange(1,81): if (i-1)%4 == 0: j += 1 if ((j-1)%5) == 0 : j=1 site='http://xz%d.mm667.com/xz%02d/images/' %(j,i) urls.append(site) print urls[i-1] #print urls ''' urls.append('http://xz1.mm667.com/xz01/images/') urls.append('http://xz1.mm667.com/xz02/images/') urls.append('http://xz1.mm667.com/xz03/images/') urls.append('http://xz1.mm667.com/xz04/images/') urls.append('http://xz1.mm667.com/xz84/images/') urls.append('http://xz2.mm667.com/xz85/images/') urls.append('http://xz3.mm667.com/xz86/images/') urls.append('http://xz1.mm667.com/s/') urls.append('http://xz1.mm667.com/p/') ''' def mkdir(path): # 引入模块 import os # 去除首位空格 path=path.strip() # 去除尾部 \ 符号 path=path.rstrip("\\") # 判断路径是否存在 # 存在 True # 不存在 False isExists=os.path.exists(path) # 判断结果 if not isExists: # 如果不存在则创建目录 print path+u' 创建成功' # 创建目录操作函数 os.makedirs(path) return True else: # 如果目录存在则不创建,并提示目录已存在 print path+u' 目录已存在' return False def cbk(a,b,c): '''''回调函数 @a: 已经下载的数据块 @b: 数据块的大小 @c: 远程文件的大小 ''' per = 100.0 * a * b / c if per > 100: per = 100 print '%.2f%%' % per #url = 'http://www.sina.com.cn' local = 'd:\\mysite\\pic1\\' d=0 mutex = threading.Lock() # mutex1 = threading.Lock() class MyThread(threading.Thread): def __init__(self, url, name): threading.Thread.__init__(self) self.url=url self.name=name def run(self): mutex.acquire() print print 'down from %s' % self.url time.sleep(1) mutex.release() try: urllib.urlretrieve(self.url, self.name) except Exception,e: print e time.sleep(1) urllib.urlretrieve(self.url, self.name) threads=[] for u in urls[84:]: d += 1 local = 'd:\\mysite\\pic1\\%d\\' %d mkdir(local) print 'download begin...' for i in xrange(40): lcal = local url=u url += '%03d.jpg' %i lcal += '%03d.jpg' %i th = MyThread(url,lcal) threads.append(th) th.start() # for t in threads: # t.join() print 'over! download finished' 爬虫抓取信息
#!/usr/bin/env python # -*- coding:utf-8 -*- """ Python爬虫,抓取一卡通相关企业信息 Anthor: yangyongzhen Version: 0.0.2 Date: 2014-12-14 Language: Python2.7.5 Editor: Sublime Text2 """ import urllib2, re, string import threading, Queue, time import sys import os from bs4 import BeautifulSoup #from pprint import pprint reload(sys) sys.setdefaultencoding('utf8') _DATA = [] FILE_LOCK = threading.Lock() SHARE_Q = Queue.Queue() #构造一个不限制大小的的队列 _WORKER_THREAD_NUM = 3 #设置线程的个数 _Num = 0 #总条数 class MyThread(threading.Thread) : def __init__(self, func,num) : super(MyThread, self).__init__() #调用父类的构造函数 self.func = func #传入线程函数逻辑 self.thread_num = num def run(self) : self.func() #print u'线程ID:',self.thread_num def worker() : global SHARE_Q while not SHARE_Q.empty(): url = SHARE_Q.get() #获得任务 my_page = get_page(url) find_data(my_page) #获得当前页面的数据 #write_into_file(temp_data) time.sleep(1) SHARE_Q.task_done() def get_page(url) : """ 根据所给的url爬取网页HTML Args: url: 表示当前要爬取页面的url Returns: 返回抓取到整个页面的HTML(unicode编码) Raises: URLError:url引发的异常 """ try : html = urllib2.urlopen(url).read() my_page = html.decode("gbk",'ignore') #my_page = unicode(html,'utf-8','ignore').encode('utf-8','ignore') #my_page = urllib2.urlopen(url).read().decode("utf8") except urllib2.URLError, e : if hasattr(e, "code"): print "The server couldn't fulfill the request." print "Error code: %s" % e.code elif hasattr(e, "
相关内容
- 详解Python单元测试的两种写法_python_
- python爬虫实战项目之爬取pixiv图片_python_
- Python批量裁剪图片的思路详解_python_
- 使用Scrapy框架爬取网页并保存到Mysql的实现_python_
- Python第三方库jieba库与中文分词全面详解_python_
- 五个Python命令使用的小妙招分享_python_
- Python selenium下拉选择框实战应用例子_python_
- python中Requests请求的安装与常见用法_python_
- Python正则表达re模块之findall()函数详解_python_
- 19个Python Sklearn中超实用的隐藏功能分享_python_
点击排行
本栏推荐
