#!/usr/bin/python3 # coding: utf-8 from bs4 import BeautifulSoup from bs4.element import Tag import MySQLdb import re import sys import time import transliterate import openai blocks = ["p", "h1", "h2", "h3", "h4", "h5", "img", "li"] def generate_response(prompt): openai.api_key = "OPENAI_API_KEY" prompt = "Интересует только перевод с русского языка на болгарский язык. Если в оригинале встречается html разметка, то вставлять ее в соответствующих местах перевода. Если перевод совпадает с оригиналом, то вывести только перевод, пояснения о совпадении оригинала и перевода не требуются. Переведи: \"" + prompt + "\"" response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[{"role": "system", "content": prompt}], temperature=0.3, n=1, stop=None, ) if 'choices' in response and len(response.choices) > 0: return response.choices[0].message.content else: return None def remove_tag(string): pattern = r'^<[^>]+>|<[^>]+>$' result = re.sub(pattern, '', string) return result def insert_into_table_postmeta(post_id, meta_key, meta_value): db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET') cur = db.cursor() sql = """ INSERT INTO wp_postmeta(meta_id, post_id, meta_key, meta_value) VALUES (NULL,'%i','%s','%s') """ % (post_id, meta_key, meta_value) cur.execute(sql) db.commit() db.close() def insert_into_table_postsTranslated(ID, post_modified): db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET') cur = db.cursor() sql = """ INSERT INTO wp_posts_translated(ID, post_modified) VALUES ('%i','%s') """ % (ID, post_modified) cur.execute(sql) db.commit() db.close() def _extract_blocks(parent_tag) -> list: extracted_blocks = [] for tag in parent_tag: if tag.name in blocks: if (tag.name == "img") and ("alt" in tag.attrs): if tag["alt"]: response = generate_response(tag["alt"]) if response is not None: tag["alt"] = response time.sleep(25) else: sys.exit() else: response = generate_response(remove_tag(str(tag))) if response is not None: s1 = BeautifulSoup(response, 'html.parser') tag.clear() tag.append(s1) time.sleep(25) else: sys.exit() extracted_blocks.append(tag) continue if isinstance(tag, Tag): if len(tag.contents) > 0: inner_blocks = _extract_blocks(tag) if len(inner_blocks) > 0: extracted_blocks.extend(inner_blocks) return extracted_blocks if __name__ == "__main__": pattern_bg = r's:2:"bg";i:(\d+);' db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET') cur = db.cursor() sql = """ SELECT * FROM wp_posts WHERE (post_status='publish') AND (post_type='post') AND (ID NOT IN (SELECT ID FROM wp_posts_translated)) ORDER BY ID ASC LIMIT 1; """ cur.execute(sql) row = cur.fetchone() if row is not None: soup = BeautifulSoup(row[4], features="lxml") extracted_blocks = _extract_blocks(soup.body) post_content = re.sub(r'(|)', '', str(soup)) post_title = "" response = generate_response(row[5]) if response is not None: post_title = response time.sleep(25) else: sys.exit() post_name = transliterate.translit(post_title, 'ru', reversed=True) post_name = post_name.lower() post_name = re.sub(r'[^a-z,0-9,-, ]', '', post_name) post_name = re.sub(r' ', '-', post_name) guid = "" ##################### # TABLE posts ##################### db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET') cur = db.cursor() sql = """ INSERT INTO wp_posts(ID, post_author, post_date, post_date_gmt, post_content, post_title, post_excerpt, post_status, comment_status, ping_status, post_password, post_name, to_ping, pinged, post_modified, post_modified_gmt, post_content_filtered, post_parent, guid, menu_order, post_type, post_mime_type, comment_count) VALUES (NULL,'%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s',0) """ % (row[1], MySQLdb.escape_string(post_content).decode('utf-8'), MySQLdb.escape_string(post_title).decode('utf-8'), row[6], row[7], row[8], row[9], row[10], post_name, row[12], row[13], row[16], row[17], guid, row[19], row[20], row[21]) cur.execute(sql) db.commit() sql = "SELECT LAST_INSERT_ID();" cur.execute(sql) tb_posts_id = cur.fetchone() if tb_posts_id is not None: guid = "https://adminkin.com/?p=%i" % tb_posts_id[0] sql = """ UPDATE wp_posts SET guid='%s' WHERE ID='%i' """ % (guid, tb_posts_id[0]) cur.execute(sql) db.commit() ############################### # TABLE term_taxonomy ############################### sql = """ INSERT INTO wp_term_taxonomy(term_taxonomy_id, term_id, taxonomy, description, parent, count) VALUES (NULL,0,'post_translations','a:2:{s:2:"bg";i:%i;s:2:"ru";i:%i;}',0,2) """ % (tb_posts_id[0], row[0]) cur.execute(sql) db.commit() sql = "SELECT LAST_INSERT_ID();" cur.execute(sql) tb_term_taxonomy_id = cur.fetchone() if tb_term_taxonomy_id is not None: sql = """ UPDATE wp_term_taxonomy SET term_id='%i' WHERE term_taxonomy_id='%i' """ % (tb_term_taxonomy_id[0], tb_term_taxonomy_id[0]) cur.execute(sql) db.commit() sql = """ INSERT INTO wp_terms(term_id, name, slug, term_group) VALUES ('%i','pll_6111111111111','pll_6111111111111',0) """ % (tb_term_taxonomy_id[0]) cur.execute(sql) db.commit() ################################ # TABLE term_relationships ################################ category_id_bg = 107 sql = """ SELECT wp_term_relationships.*, wp_term_taxonomy.taxonomy FROM wp_term_relationships LEFT JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id WHERE wp_term_relationships.object_id='%i' """ % row[0] cur.execute(sql) for tb_term_relationships in cur.fetchall(): if (tb_term_relationships[3] == 'category') or (tb_term_relationships[3] == 'post_tag'): pattern_ru = r's:2:"ru";i:%i;' % tb_term_relationships[1] sql = """ SELECT * FROM wp_term_taxonomy WHERE taxonomy LIKE 'term_translations' """ cur.execute(sql) for res in cur.fetchall(): search_result = re.search(pattern_ru, res[3]) if search_result: search_result = re.search(pattern_bg, res[3]) if search_result: id_bg = int(search_result.group(1)) sql = """ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order) VALUES ('%i','%s','%s') """ % (tb_posts_id[0], id_bg, tb_term_relationships[2]) cur.execute(sql) db.commit() sql = """ UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%s' """ % (id_bg) cur.execute(sql) db.commit() if (tb_term_relationships[3] == 'category'): category_id_bg = id_bg break elif (tb_term_relationships[3] == 'language'): language_id_bg = 105 sql = """ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order) VALUES ('%i','%i','%s') """ % (tb_posts_id[0], language_id_bg, tb_term_relationships[2]) cur.execute(sql) db.commit() sql = """ UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%i' """ % (language_id_bg) cur.execute(sql) db.commit() sql = """ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order) VALUES ('%i','%i',0) """ % (tb_posts_id[0], tb_term_taxonomy_id[0]) cur.execute(sql) db.commit() sql = """ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order) VALUES ('%i','%i',0) """ % (row[0], tb_term_taxonomy_id[0]) cur.execute(sql) db.commit() ######################## # TABLE postmeta ######################## sql = """ SELECT * FROM wp_postmeta WHERE post_id = '%i' """ % row[0] cur.execute(sql) for tb_postmeta in cur.fetchall(): meta_value = "" if (tb_postmeta[2] == '_yoast_wpseo_focuskw') or (tb_postmeta[2] == '_yoast_wpseo_title'): response = generate_response(tb_postmeta[3]) if response is not None: meta_value = response time.sleep(25) meta_value = MySQLdb.escape_string(meta_value).decode('utf-8') else: time.sleep(25) continue elif (tb_postmeta[2] == '_original_post'): meta_value = "https://adminkin.com/?p=%i" % tb_posts_id[0] elif (tb_postmeta[2] == '_yoast_wpseo_primary_category'): meta_value = "%i" % category_id_bg else: meta_value = MySQLdb.escape_string(tb_postmeta[3]).decode('utf-8') insert_into_table_postmeta(tb_posts_id[0], tb_postmeta[2], meta_value) ############################## # TABLE posts_translated ############################## insert_into_table_postsTranslated(row[0], row[14]) db.close()