admin管理员组

文章数量:1130349

下载了一些英文PDF文章,想把它们翻译一下文件名,并且以英文名_翻译中文重命名。

 

调用百度翻译api,在百度翻译开放平台注册通用翻译API-标准版,是免费的。

http://fanyi-api.baidu/api/trans/product/desktop

申请时服务器,我填了个本机的ip,会报错

58000客户端IP非法检查个人资料里填写的 IP地址 是否正确,可前往管理控制平台修改

我修改成空后,demo就不报错了,所以服务器地址最好不填。

申请好后,拉倒网址页面最下面,记录

申请信息

APP ID:

密钥:

#-*- coding:UTF-8 -*-
import cv2
import os
import re
import numpy as np
from glob import glob
import requests
import urllib.request
import time
import urllib
import urllib.parse
import random
import requests
import random
import json
from hashlib import md5
import time
# Set your own appid/appkey.输入你自己的通用翻译api-标准版
appid = 'INPUT_YOUR_APPID' 
appkey = 'INPUT_YOUR_APPKEY'

# For list of language codes, please refer to `https://api.fanyi.baidu/doc/21`
from_lang = 'en'
to_lang =  'zh'

endpoint = 'http://api.fanyi.baidu'
path = '/api/trans/vip/translate'
url = endpoint + path

#query = 'Dynamic hardware system for cascade SVM classification of melanoma'

# Generate salt and sign
def make_md5(s, encoding='utf-8'):
    return md5(s.encode(encoding)).hexdigest()

salt = random.randint(32768, 65536)


# Build request
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
#把pdf文章放在main.py目录下的./1/文件夹
video_path = "./1/"
frames = glob(os.path.join(video_path, '*.pdf'))

for i, frame in enumerate(frames):
    pattern = repile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
    data = pattern.findall(frame)
    data_now = data[0]
    print(data_now)
    query = data_now
    sign = make_md5(appid + query + str(salt) + appkey)
    payload = {'appid': appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
    # Send request
    r = requests.post(url, params=payload, headers=headers)
    result = r.json()
    if(r.text.find('trans_result')>=0):
        result1=result['trans_result'][0]['dst']
        # Show response
        #print(json.dumps(result, indent=4, ensure_ascii=False))
        print(result1)
        os.rename(video_path+data_now+".pdf",video_path+data_now+"_"+result1+".pdf")    
    #延迟2-3s,不然会出现54003错误 访问频率受限
    time.sleep(3) 

有大神写成了软件,https://www.52pojie/thread-1482332-1-1.html  还是相当好用的

·········································································································

期间也试了下抓包分析有道翻译,百度翻译,逆向解析sign,

超详细百度翻译js逆向(token 和 sign)_小生听雨园的博客-CSDN博客

JS逆向必会基础案例 | 百度翻译参数破解_shine4869的博客-CSDN博客

GitHub - Xuenew/Python_Spider_All: 每完成一个项目存储一个 欢迎添加

能用是能用,但是发现翻译字数有限,超出就会报{"errno":998,"errmsg":"\u672a\u77e5\u9519\u8bef"

·············································································································

还试了金山翻译有反爬机制,所以放弃,signitual自己也不会逆向。

http://www.iciba/    

#-*- coding:UTF-8 -*-
import cv2
import os
import re
import numpy as np
from glob import glob
import requests
import urllib.request
import time
import urllib
import urllib.parse
import random
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}

video_path = "./1/"
frames = glob(os.path.join(video_path, '*.pdf'))

for i, frame in enumerate(frames):
    pattern = repile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
    data = pattern.findall(frame)
    data_now = data[0]
    print(data_now)
    url_code_name = urllib.parse.quote(data_now)
    print(url_code_name)
    html = requests.get("http://www.iciba/word?w="+url_code_name,headers=headers)
    #decode = html.content.decode('gbk','ignore')
    u = html.content.decode('utf-8')
    print(u)
    name=re.findall(r'<p>(.*?)</p>',u)
    print(name)
    os.rename(video_path+data_now+".pdf",video_path+data_now+name[0]+".pdf")

金山词霸get请求运行了一会后,现在浏览器直接访问都打不开网页了。方案pass。

下载了一些英文PDF文章,想把它们翻译一下文件名,并且以英文名_翻译中文重命名。

 

调用百度翻译api,在百度翻译开放平台注册通用翻译API-标准版,是免费的。

http://fanyi-api.baidu/api/trans/product/desktop

申请时服务器,我填了个本机的ip,会报错

58000客户端IP非法检查个人资料里填写的 IP地址 是否正确,可前往管理控制平台修改

我修改成空后,demo就不报错了,所以服务器地址最好不填。

申请好后,拉倒网址页面最下面,记录

申请信息

APP ID:

密钥:

#-*- coding:UTF-8 -*-
import cv2
import os
import re
import numpy as np
from glob import glob
import requests
import urllib.request
import time
import urllib
import urllib.parse
import random
import requests
import random
import json
from hashlib import md5
import time
# Set your own appid/appkey.输入你自己的通用翻译api-标准版
appid = 'INPUT_YOUR_APPID' 
appkey = 'INPUT_YOUR_APPKEY'

# For list of language codes, please refer to `https://api.fanyi.baidu/doc/21`
from_lang = 'en'
to_lang =  'zh'

endpoint = 'http://api.fanyi.baidu'
path = '/api/trans/vip/translate'
url = endpoint + path

#query = 'Dynamic hardware system for cascade SVM classification of melanoma'

# Generate salt and sign
def make_md5(s, encoding='utf-8'):
    return md5(s.encode(encoding)).hexdigest()

salt = random.randint(32768, 65536)


# Build request
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
#把pdf文章放在main.py目录下的./1/文件夹
video_path = "./1/"
frames = glob(os.path.join(video_path, '*.pdf'))

for i, frame in enumerate(frames):
    pattern = repile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
    data = pattern.findall(frame)
    data_now = data[0]
    print(data_now)
    query = data_now
    sign = make_md5(appid + query + str(salt) + appkey)
    payload = {'appid': appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
    # Send request
    r = requests.post(url, params=payload, headers=headers)
    result = r.json()
    if(r.text.find('trans_result')>=0):
        result1=result['trans_result'][0]['dst']
        # Show response
        #print(json.dumps(result, indent=4, ensure_ascii=False))
        print(result1)
        os.rename(video_path+data_now+".pdf",video_path+data_now+"_"+result1+".pdf")    
    #延迟2-3s,不然会出现54003错误 访问频率受限
    time.sleep(3) 

有大神写成了软件,https://www.52pojie/thread-1482332-1-1.html  还是相当好用的

·········································································································

期间也试了下抓包分析有道翻译,百度翻译,逆向解析sign,

超详细百度翻译js逆向(token 和 sign)_小生听雨园的博客-CSDN博客

JS逆向必会基础案例 | 百度翻译参数破解_shine4869的博客-CSDN博客

GitHub - Xuenew/Python_Spider_All: 每完成一个项目存储一个 欢迎添加

能用是能用,但是发现翻译字数有限,超出就会报{"errno":998,"errmsg":"\u672a\u77e5\u9519\u8bef"

·············································································································

还试了金山翻译有反爬机制,所以放弃,signitual自己也不会逆向。

http://www.iciba/    

#-*- coding:UTF-8 -*-
import cv2
import os
import re
import numpy as np
from glob import glob
import requests
import urllib.request
import time
import urllib
import urllib.parse
import random
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}

video_path = "./1/"
frames = glob(os.path.join(video_path, '*.pdf'))

for i, frame in enumerate(frames):
    pattern = repile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
    data = pattern.findall(frame)
    data_now = data[0]
    print(data_now)
    url_code_name = urllib.parse.quote(data_now)
    print(url_code_name)
    html = requests.get("http://www.iciba/word?w="+url_code_name,headers=headers)
    #decode = html.content.decode('gbk','ignore')
    u = html.content.decode('utf-8')
    print(u)
    name=re.findall(r'<p>(.*?)</p>',u)
    print(name)
    os.rename(video_path+data_now+".pdf",video_path+data_now+name[0]+".pdf")

金山词霸get请求运行了一会后,现在浏览器直接访问都打不开网页了。方案pass。

本文标签: 英文文件名批量python