#!/usr/bin/python3 # coding: utf-8 from bs4 import BeautifulSoup from bs4.element import Tag import MySQLdb import re import sys import time import transliterate import openai blocks = ["p", "h1", "h2", "h3", "h4", "h5", "img", "li"] def href_rus_to_bg(db, cur, pattern_bg, text): pattern_url = r'https://adminkin.com/([^/]+)' for rus_postname in re.findall(pattern_url, text): sql = """ SELECT wp_term_taxonomy.taxonomy, wp_term_taxonomy.description FROM wp_posts LEFT JOIN wp_term_relationships ON wp_posts.ID=wp_term_relationships.object_id LEFT JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id WHERE wp_posts.post_name LIKE '%s'; """ % rus_postname cur.execute(sql) for row in cur.fetchall(): if (row[0] == 'post_translations'): search_result = re.search(pattern_bg, row[1]) if search_result: bg_postid = int(search_result.group(1)) sql = "SELECT post_name FROM wp_posts WHERE ID = '%i'" % bg_postid cur.execute(sql) row2 = cur.fetchone() if row2 is not None: bg_postname = "bg/%s" % row2[0] text = re.sub(rus_postname, bg_postname, text) break return text def generate_response(prompt): openai.api_key = "OPENAI_API_KEY" prompt = "Требуется только перевод с русского языка на болгарский язык. Если в оригинале есть английские слова, то они должны присутствовать в переводе. Если в оригинале встречается html разметка, то вставлять её в соответствующих местах перевода. Никакие пояснения к переводу не требуются. Переведи: \"" + prompt + "\"" response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[{"role": "system", "content": prompt}], temperature=0.3, n=1, stop=None, ) if 'choices' in response and len(response.choices) > 0: text = response.choices[0].message.content if text.startswith('"'): text = text[1:] if text.endswith('"'): text = text[:-1] return text else: return None def remove_tag(string): pattern = r'^<[^>]+>|<[^>]+>$' result = re.sub(pattern, '', string) return result def insert_into_table_postmeta(post_id, meta_key, meta_value): db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET') cur = db.cursor() sql = """ INSERT INTO wp_postmeta(meta_id, post_id, meta_key, meta_value) VALUES (NULL,'%i','%s','%s') """ % (post_id, meta_key, meta_value) cur.execute(sql) db.commit() db.close() def insert_into_table_postsTranslated(ID, post_modified): db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET') cur = db.cursor() sql = """ INSERT INTO wp_posts_translated(ID, post_modified) VALUES ('%i','%s') """ % (ID, post_modified) cur.execute(sql) db.commit() db.close() def _extract_blocks(parent_tag) -> list: extracted_blocks = [] for tag in parent_tag: if tag.name in blocks: if (tag.name == "img") and ("alt" in tag.attrs): if tag["alt"]: response = generate_response(tag["alt"]) if response is not None: tag["alt"] = response time.sleep(25) else: sys.exit() else: response = generate_response(remove_tag(str(tag))) if response is not None: s1 = BeautifulSoup(response, 'html.parser') tag.clear() tag.append(s1) time.sleep(25) else: sys.exit() extracted_blocks.append(tag) continue if isinstance(tag, Tag): if len(tag.contents) > 0: inner_blocks = _extract_blocks(tag) if len(inner_blocks) > 0: extracted_blocks.extend(inner_blocks) return extracted_blocks if __name__ == "__main__": pattern_bg = r's:2:"bg";i:(\d+);' db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET') cur = db.cursor() sql = """ SELECT * FROM wp_posts WHERE (post_status='publish') AND (post_type='post') AND (ID NOT IN (SELECT ID FROM wp_posts_translated)) ORDER BY ID ASC LIMIT 1; """ cur.execute(sql) row = cur.fetchone() if row is not None: text = href_rus_to_bg(db, cur, pattern_bg, row[4]) soup = BeautifulSoup(text, features="lxml") extracted_blocks = _extract_blocks(soup.body) post_content = re.sub(r'(|)', '', str(soup)) post_title = "" response = generate_response(row[5]) if response is not None: post_title = response time.sleep(25) else: sys.exit() post_name = transliterate.translit(post_title, 'ru', reversed=True) post_name = post_name.lower() post_name = re.sub(r'[^a-z0-9\- ]', '', post_name) post_name = re.sub(r' ', '-', post_name) guid = "" ##################### # TABLE posts ##################### db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET') cur = db.cursor() sql = """ INSERT INTO wp_posts(ID, post_author, post_date, post_date_gmt, post_content, post_title, post_excerpt, post_status, comment_status, ping_status, post_password, post_name, to_ping, pinged, post_modified, post_modified_gmt, post_content_filtered, post_parent, guid, menu_order, post_type, post_mime_type, comment_count) VALUES (NULL,'%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s',0) """ % (row[1], MySQLdb.escape_string(post_content).decode('utf-8'), MySQLdb.escape_string(post_title).decode('utf-8'), row[6], row[7], row[8], row[9], row[10], post_name, row[12], row[13], row[16], row[17], guid, row[19], row[20], row[21]) cur.execute(sql) db.commit() sql = "SELECT LAST_INSERT_ID();" cur.execute(sql) tb_posts_id = cur.fetchone() if tb_posts_id is not None: guid = "https://adminkin.com/?p=%i" % tb_posts_id[0] sql = """ UPDATE wp_posts SET guid='%s' WHERE ID='%i' """ % (guid, tb_posts_id[0]) cur.execute(sql) db.commit() ############################### # TABLE term_taxonomy ############################### sql = """ INSERT INTO wp_term_taxonomy(term_taxonomy_id, term_id, taxonomy, description, parent, count) VALUES (NULL,0,'post_translations','a:2:{s:2:"bg";i:%i;s:2:"ru";i:%i;}',0,2) """ % (tb_posts_id[0], row[0]) cur.execute(sql) db.commit() sql = "SELECT LAST_INSERT_ID();" cur.execute(sql) tb_term_taxonomy_id = cur.fetchone() if tb_term_taxonomy_id is not None: sql = """ UPDATE wp_term_taxonomy SET term_id='%i' WHERE term_taxonomy_id='%i' """ % (tb_term_taxonomy_id[0], tb_term_taxonomy_id[0]) cur.execute(sql) db.commit() sql = """ INSERT INTO wp_terms(term_id, name, slug, term_group) VALUES ('%i','pll_6111111111111','pll_6111111111111',0) """ % (tb_term_taxonomy_id[0]) cur.execute(sql) db.commit() ################################ # TABLE term_relationships ################################ category_id_bg = 107 sql = """ SELECT wp_term_relationships.*, wp_term_taxonomy.taxonomy FROM wp_term_relationships LEFT JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id WHERE wp_term_relationships.object_id='%i' """ % row[0] cur.execute(sql) for tb_term_relationships in cur.fetchall(): if (tb_term_relationships[3] == 'category') or (tb_term_relationships[3] == 'post_tag'): pattern_ru = r's:2:"ru";i:%i;' % tb_term_relationships[1] sql = """ SELECT * FROM wp_term_taxonomy WHERE taxonomy LIKE 'term_translations' """ cur.execute(sql) for res in cur.fetchall(): search_result = re.search(pattern_ru, res[3]) if search_result: search_result = re.search(pattern_bg, res[3]) if search_result: id_bg = int(search_result.group(1)) sql = """ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order) VALUES ('%i','%s','%s') """ % (tb_posts_id[0], id_bg, tb_term_relationships[2]) cur.execute(sql) db.commit() sql = """ UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%s' """ % (id_bg) cur.execute(sql) db.commit() if (tb_term_relationships[3] == 'category'): category_id_bg = id_bg break elif (tb_term_relationships[3] == 'language'): language_id_bg = 105 sql = """ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order) VALUES ('%i','%i','%s') """ % (tb_posts_id[0], language_id_bg, tb_term_relationships[2]) cur.execute(sql) db.commit() sql = """ UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%i' """ % (language_id_bg) cur.execute(sql) db.commit() sql = """ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order) VALUES ('%i','%i',0) """ % (tb_posts_id[0], tb_term_taxonomy_id[0]) cur.execute(sql) db.commit() sql = """ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order) VALUES ('%i','%i',0) """ % (row[0], tb_term_taxonomy_id[0]) cur.execute(sql) db.commit() ######################## # TABLE postmeta ######################## sql = """ SELECT * FROM wp_postmeta WHERE post_id = '%i' """ % row[0] cur.execute(sql) for tb_postmeta in cur.fetchall(): meta_value = "" if (tb_postmeta[2] == '_yoast_wpseo_focuskw') or ((tb_postmeta[2] == '_yoast_wpseo_title') and (tb_postmeta[3] != '%%title%%')): response = generate_response(tb_postmeta[3]) if response is not None: meta_value = response time.sleep(25) meta_value = MySQLdb.escape_string(meta_value).decode('utf-8') else: time.sleep(25) continue elif (tb_postmeta[2] == '_original_post'): meta_value = "https://adminkin.com/?p=%i" % tb_posts_id[0] elif (tb_postmeta[2] == '_yoast_wpseo_primary_category'): meta_value = "%i" % category_id_bg else: meta_value = MySQLdb.escape_string(tb_postmeta[3]).decode('utf-8') insert_into_table_postmeta(tb_posts_id[0], tb_postmeta[2], meta_value) ############################## # TABLE posts_translated ############################## insert_into_table_postsTranslated(row[0], row[14]) insert_into_table_postsTranslated(tb_posts_id[0], row[14]) db.close()