公司使用电商分析平台需要英文版,商品spu数据需要翻译。调研几个平台的翻译API:有道,google,百度,最后选择百度(并不是因为百度翻译的好,而是百度不要钱!)
示例代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#/usr/bin/env python
# -*- coding:utf-8 -*-

import json
import httplib
import md5
import urllib
import random

appid = ''
secretKey = ''
httpClient = None
fromLang = 'zh'
toLang = 'en'

infos = [] # 待翻译
results = {}

def make_file(filename):
num = 0
with open(filename, 'w+') as f:
for info in infos:
myurl = '/api/trans/vip/translate'
q = info.strip()
salt = random.randint(32768, 65536)
sign = appid+q+str(salt)+secretKey
m1 = md5.new()
m1.update(sign)
sign = m1.hexdigest()
myurl = myurl+'?appid='+appid+'&q='+urllib.quote(q)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
try:
httpClient = httplib.HTTPConnection('api.fanyi.baidu.com')
httpClient.request('GET', myurl)
response = httpClient.getresponse()
result = json.loads(response.read())['trans_result'][0]['dst'].encode('utf-8')
results["data"] = result
line = json.dumps(results)
f.write(line + '\n')
num += 1
if num % 1000 == 0:
print('success {}'.format(num))
except Exception, e:
print e
finally:
if httpClient:
httpClient.close()

if __name__ == "__main__":
filename = 'trans.json'
make_file(filename)
翻译质量并不算高,而且没办法保持翻译之前的数据分割格式。不要钱,凑合着用吧。
参考: