admin管理员组文章数量:1130349
下载了一些英文PDF文章,想把它们翻译一下文件名,并且以英文名_翻译中文重命名。
调用百度翻译api,在百度翻译开放平台注册通用翻译API-标准版,是免费的。
http://fanyi-api.baidu/api/trans/product/desktop
申请时服务器,我填了个本机的ip,会报错
| 58000 | 客户端IP非法 | 检查个人资料里填写的 IP地址 是否正确,可前往管理控制平台修改 |
我修改成空后,demo就不报错了,所以服务器地址最好不填。
申请好后,拉倒网址页面最下面,记录
申请信息
APP ID:
密钥:
#-*- coding:UTF-8 -*-
import cv2
import os
import re
import numpy as np
from glob import glob
import requests
import urllib.request
import time
import urllib
import urllib.parse
import random
import requests
import random
import json
from hashlib import md5
import time
# Set your own appid/appkey.输入你自己的通用翻译api-标准版
appid = 'INPUT_YOUR_APPID'
appkey = 'INPUT_YOUR_APPKEY'
# For list of language codes, please refer to `https://api.fanyi.baidu/doc/21`
from_lang = 'en'
to_lang = 'zh'
endpoint = 'http://api.fanyi.baidu'
path = '/api/trans/vip/translate'
url = endpoint + path
#query = 'Dynamic hardware system for cascade SVM classification of melanoma'
# Generate salt and sign
def make_md5(s, encoding='utf-8'):
return md5(s.encode(encoding)).hexdigest()
salt = random.randint(32768, 65536)
# Build request
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
#把pdf文章放在main.py目录下的./1/文件夹
video_path = "./1/"
frames = glob(os.path.join(video_path, '*.pdf'))
for i, frame in enumerate(frames):
pattern = repile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
data = pattern.findall(frame)
data_now = data[0]
print(data_now)
query = data_now
sign = make_md5(appid + query + str(salt) + appkey)
payload = {'appid': appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
# Send request
r = requests.post(url, params=payload, headers=headers)
result = r.json()
if(r.text.find('trans_result')>=0):
result1=result['trans_result'][0]['dst']
# Show response
#print(json.dumps(result, indent=4, ensure_ascii=False))
print(result1)
os.rename(video_path+data_now+".pdf",video_path+data_now+"_"+result1+".pdf")
#延迟2-3s,不然会出现54003错误 访问频率受限
time.sleep(3)
有大神写成了软件,https://www.52pojie/thread-1482332-1-1.html 还是相当好用的
·········································································································
期间也试了下抓包分析有道翻译,百度翻译,逆向解析sign,
超详细百度翻译js逆向(token 和 sign)_小生听雨园的博客-CSDN博客
JS逆向必会基础案例 | 百度翻译参数破解_shine4869的博客-CSDN博客
GitHub - Xuenew/Python_Spider_All: 每完成一个项目存储一个 欢迎添加
能用是能用,但是发现翻译字数有限,超出就会报{"errno":998,"errmsg":"\u672a\u77e5\u9519\u8bef"
·············································································································
还试了金山翻译有反爬机制,所以放弃,signitual自己也不会逆向。
http://www.iciba/
#-*- coding:UTF-8 -*-
import cv2
import os
import re
import numpy as np
from glob import glob
import requests
import urllib.request
import time
import urllib
import urllib.parse
import random
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
video_path = "./1/"
frames = glob(os.path.join(video_path, '*.pdf'))
for i, frame in enumerate(frames):
pattern = repile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
data = pattern.findall(frame)
data_now = data[0]
print(data_now)
url_code_name = urllib.parse.quote(data_now)
print(url_code_name)
html = requests.get("http://www.iciba/word?w="+url_code_name,headers=headers)
#decode = html.content.decode('gbk','ignore')
u = html.content.decode('utf-8')
print(u)
name=re.findall(r'<p>(.*?)</p>',u)
print(name)
os.rename(video_path+data_now+".pdf",video_path+data_now+name[0]+".pdf")
金山词霸get请求运行了一会后,现在浏览器直接访问都打不开网页了。方案pass。
下载了一些英文PDF文章,想把它们翻译一下文件名,并且以英文名_翻译中文重命名。
调用百度翻译api,在百度翻译开放平台注册通用翻译API-标准版,是免费的。
http://fanyi-api.baidu/api/trans/product/desktop
申请时服务器,我填了个本机的ip,会报错
| 58000 | 客户端IP非法 | 检查个人资料里填写的 IP地址 是否正确,可前往管理控制平台修改 |
我修改成空后,demo就不报错了,所以服务器地址最好不填。
申请好后,拉倒网址页面最下面,记录
申请信息
APP ID:
密钥:
#-*- coding:UTF-8 -*-
import cv2
import os
import re
import numpy as np
from glob import glob
import requests
import urllib.request
import time
import urllib
import urllib.parse
import random
import requests
import random
import json
from hashlib import md5
import time
# Set your own appid/appkey.输入你自己的通用翻译api-标准版
appid = 'INPUT_YOUR_APPID'
appkey = 'INPUT_YOUR_APPKEY'
# For list of language codes, please refer to `https://api.fanyi.baidu/doc/21`
from_lang = 'en'
to_lang = 'zh'
endpoint = 'http://api.fanyi.baidu'
path = '/api/trans/vip/translate'
url = endpoint + path
#query = 'Dynamic hardware system for cascade SVM classification of melanoma'
# Generate salt and sign
def make_md5(s, encoding='utf-8'):
return md5(s.encode(encoding)).hexdigest()
salt = random.randint(32768, 65536)
# Build request
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
#把pdf文章放在main.py目录下的./1/文件夹
video_path = "./1/"
frames = glob(os.path.join(video_path, '*.pdf'))
for i, frame in enumerate(frames):
pattern = repile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
data = pattern.findall(frame)
data_now = data[0]
print(data_now)
query = data_now
sign = make_md5(appid + query + str(salt) + appkey)
payload = {'appid': appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
# Send request
r = requests.post(url, params=payload, headers=headers)
result = r.json()
if(r.text.find('trans_result')>=0):
result1=result['trans_result'][0]['dst']
# Show response
#print(json.dumps(result, indent=4, ensure_ascii=False))
print(result1)
os.rename(video_path+data_now+".pdf",video_path+data_now+"_"+result1+".pdf")
#延迟2-3s,不然会出现54003错误 访问频率受限
time.sleep(3)
有大神写成了软件,https://www.52pojie/thread-1482332-1-1.html 还是相当好用的
·········································································································
期间也试了下抓包分析有道翻译,百度翻译,逆向解析sign,
超详细百度翻译js逆向(token 和 sign)_小生听雨园的博客-CSDN博客
JS逆向必会基础案例 | 百度翻译参数破解_shine4869的博客-CSDN博客
GitHub - Xuenew/Python_Spider_All: 每完成一个项目存储一个 欢迎添加
能用是能用,但是发现翻译字数有限,超出就会报{"errno":998,"errmsg":"\u672a\u77e5\u9519\u8bef"
·············································································································
还试了金山翻译有反爬机制,所以放弃,signitual自己也不会逆向。
http://www.iciba/
#-*- coding:UTF-8 -*-
import cv2
import os
import re
import numpy as np
from glob import glob
import requests
import urllib.request
import time
import urllib
import urllib.parse
import random
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
video_path = "./1/"
frames = glob(os.path.join(video_path, '*.pdf'))
for i, frame in enumerate(frames):
pattern = repile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
data = pattern.findall(frame)
data_now = data[0]
print(data_now)
url_code_name = urllib.parse.quote(data_now)
print(url_code_name)
html = requests.get("http://www.iciba/word?w="+url_code_name,headers=headers)
#decode = html.content.decode('gbk','ignore')
u = html.content.decode('utf-8')
print(u)
name=re.findall(r'<p>(.*?)</p>',u)
print(name)
os.rename(video_path+data_now+".pdf",video_path+data_now+name[0]+".pdf")
金山词霸get请求运行了一会后,现在浏览器直接访问都打不开网页了。方案pass。
版权声明:本文标题:基于python英文文件名批量翻译并重命名 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://it.en369.cn/jiaocheng/1764014779a2979523.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。


发表评论