Web 压缩常见方式有 gzip、deflate、br(Brotli 是一种由 Google 开发的全新压缩算法,可以有效减小传输内容大小,加速分发效果)等。
这里重点说下gzip的使用。
如果是使用python的requests请求数据,则无论请求头是否加入
Accept-Encoding:gzip
返回的数据如果是gzip,都会自动解压处理。
但是如果用pycurl,就需要加入一些配置
#POST def curl_post(url, data): crl = pycurl.Curl() b = io.BytesIO() #crl.setopt(pycurl.VERBOSE,1) crl.setopt(pycurl.FOLLOWLOCATION, 1) crl.setopt(pycurl.MAXREDIRS, 5) #crl.setopt(pycurl.AUTOREFERER,1) crl.setopt(pycurl.CONNECTTIMEOUT, 60) crl.setopt(pycurl.TIMEOUT, 300) crl.setopt(pycurl.ENCODING, 'gzip,deflate') #crl.setopt(pycurl.PROXY,proxy) crl.setopt(pycurl.HTTPPROXYTUNNEL,1) crl.setopt(pycurl.NOSIGNAL, 1) crl.setopt(pycurl.USERAGENT, "dhgu hoho") crl.setopt(pycurl.HTTPHEADER, ["User-Agent: %s" % "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)", '*/*']) urllib.parse.urlencode(data) crl.setopt(crl.POSTFIELDS, urllib.parse.urlencode(data)) crl.setopt(pycurl.URL, url) crl.setopt(crl.WRITEFUNCTION, b.write) crl.setopt(pycurl.CONNECTTIMEOUT, 3) crl.setopt(pycurl.TIMEOUT, 120) crl.perform() ret = b.getvalue() b.close() crl.close() return ret # GET def curl_get(url): fp = io.BytesIO() curl_handle = pycurl.Curl() curl_handle.setopt(pycurl.FOLLOWLOCATION, 1) curl_handle.setopt(pycurl.MAXREDIRS, 5) curl_handle.setopt(pycurl.CONNECTTIMEOUT, 100) curl_handle.setopt(pycurl.TIMEOUT, 120) curl_handle.setopt(pycurl.ENCODING, 'gzip,deflate') curl_handle.setopt(pycurl.NOSIGNAL, 1) # curl_handle.setopt(pycurl.LOW_SPEED_LIMIT, 100) # curl_handle.setopt(pycurl.LOW_SPEED_TIME, 120) curl_handle.setopt(pycurl.HTTPHEADER, [ "User-Agent: %s" % "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)", '*/*', # 'Accept-Encoding:gzip', ]) curl_handle.setopt(pycurl.MAXFILESIZE, 20485760) curl_handle.setopt(pycurl.URL, url) curl_handle.setopt(pycurl.WRITEFUNCTION, fp.write) curl_handle.perform() curl_handle.setopt(pycurl.HEADERFUNCTION, getheader) print(curl_handle.getinfo(pycurl.HTTP_CODE)) TOTAL_TIME = curl_handle.getinfo(curl_handle.TOTAL_TIME) #print("传输结束总时间: %.2f ms" %(TOTAL_TIME * 1000)) SIZE_DOWNLOAD = curl_handle.getinfo(curl_handle.SIZE_DOWNLOAD) #print("下载数据包大小: %d bytes/s" %(SIZE_DOWNLOAD)) ret = fp.getvalue() fp.close() curl_handle.close() return ret # Test ret = httpget("http://test.cn/gziptest.php") print(type(ret)) print(ret)
注意:如果pycurl请求头加入gzip而返回处理不配置gzip解析,则会导致数据无法解析出现乱码,如果请求头不加入gzip,返回数据加入gzip配置处理,则接口支持gzip,则处理gzip,不支持则无需处理。
另外,也可以单独对数据进行gzip解压处理,特别是文件gzip压缩后,可以使用以下方法处理:
import io, gzip def gzdecode(data) : # compressedstream = io.StringIO(data) compressedstream = io.BytesIO(data) gziper = gzip.GzipFile(fileobj=compressedstream) data1 = gziper.read() # 读取解压缩后数据 return data
PHP模拟gzip返回数据:
0, 'data'=>'测试1'], JSON_UNESCAPED_UNICODE); //if( !headers_sent() && // extension_loaded("zlib") && // strstr($_SERVER["HTTP_ACCEPT_ENCODING"],"gzip")) //{ ini_set('zlib.output_compression', 'On'); ini_set('zlib.output_compression_level', '4'); //} //echo '
'; echo json_encode(['code'=>0, 'data'=>'测试'], JSON_UNESCAPED_UNICODE);
转载请注明:永盟博客 » python配置传输压缩gzip优化