123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- #!/usr/bin/python3
- # coding: utf-8
- from bs4 import BeautifulSoup
- from bs4.element import Tag
- import MySQLdb
- import re
- import sys
- import time
- import transliterate
- import openai
- blocks = ["p", "h1", "h2", "h3", "h4", "h5", "img", "li"]
- def href_rus_to_bg(db, cur, pattern_bg, text):
- pattern_url = r'https://adminkin.com/([^/]+)'
- for rus_postname in re.findall(pattern_url, text):
- sql = """
- SELECT wp_term_taxonomy.taxonomy, wp_term_taxonomy.description FROM wp_posts
- LEFT JOIN wp_term_relationships ON wp_posts.ID=wp_term_relationships.object_id
- LEFT JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id
- WHERE wp_posts.post_name LIKE '%s';
- """ % rus_postname
- cur.execute(sql)
- for row in cur.fetchall():
- if (row[0] == 'post_translations'):
- search_result = re.search(pattern_bg, row[1])
- if search_result:
- bg_postid = int(search_result.group(1))
- sql = "SELECT post_name FROM wp_posts WHERE ID = '%i'" % bg_postid
- cur.execute(sql)
- row2 = cur.fetchone()
- if row2 is not None:
- bg_postname = "bg/%s" % row2[0]
- text = re.sub(rus_postname, bg_postname, text)
- break
- return text
- def generate_response(prompt):
- openai.api_key = "OPENAI_API_KEY"
- prompt = "Требуется только перевод с русского языка на болгарский язык. Если в оригинале есть английские слова, то они должны присутствовать в переводе. Если в оригинале встречается html разметка, то вставлять её в соответствующих местах перевода. Никакие пояснения к переводу не требуются. Переведи: \"" + prompt + "\""
- response = openai.ChatCompletion.create(
- model="gpt-3.5-turbo",
- messages=[{"role": "system", "content": prompt}],
- temperature=0.3,
- n=1,
- stop=None,
- )
- if 'choices' in response and len(response.choices) > 0:
- text = response.choices[0].message.content
- if text.startswith('"'):
- text = text[1:]
- if text.endswith('"'):
- text = text[:-1]
- return text
- else:
- return None
- def remove_tag(string):
- pattern = r'^<[^>]+>|<[^>]+>$'
- result = re.sub(pattern, '', string)
- return result
- def insert_into_table_postmeta(post_id, meta_key, meta_value):
- db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
- cur = db.cursor()
- sql = """
- INSERT INTO wp_postmeta(meta_id, post_id, meta_key, meta_value)
- VALUES (NULL,'%i','%s','%s')
- """ % (post_id, meta_key, meta_value)
- cur.execute(sql)
- db.commit()
- db.close()
- def insert_into_table_postsTranslated(ID, post_modified):
- db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
- cur = db.cursor()
- sql = """
- INSERT INTO wp_posts_translated(ID, post_modified)
- VALUES ('%i','%s')
- """ % (ID, post_modified)
- cur.execute(sql)
- db.commit()
- db.close()
- def _extract_blocks(parent_tag) -> list:
- extracted_blocks = []
- for tag in parent_tag:
- if tag.name in blocks:
- if (tag.name == "img") and ("alt" in tag.attrs):
- if tag["alt"]:
- response = generate_response(tag["alt"])
- if response is not None:
- tag["alt"] = response
- time.sleep(25)
- else:
- sys.exit()
- else:
- response = generate_response(remove_tag(str(tag)))
- if response is not None:
- s1 = BeautifulSoup(response, 'html.parser')
- tag.clear()
- tag.append(s1)
- time.sleep(25)
- else:
- sys.exit()
- extracted_blocks.append(tag)
- continue
- if isinstance(tag, Tag):
- if len(tag.contents) > 0:
- inner_blocks = _extract_blocks(tag)
- if len(inner_blocks) > 0:
- extracted_blocks.extend(inner_blocks)
- return extracted_blocks
- if __name__ == "__main__":
- pattern_bg = r's:2:"bg";i:(\d+);'
- db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
- cur = db.cursor()
- sql = """
- SELECT * FROM wp_posts WHERE (post_status='publish') AND (post_type='post') AND (ID NOT IN (SELECT ID FROM wp_posts_translated)) ORDER BY ID ASC LIMIT 1;
- """
- cur.execute(sql)
- row = cur.fetchone()
- if row is not None:
- text = href_rus_to_bg(db, cur, pattern_bg, row[4])
- soup = BeautifulSoup(text, features="lxml")
- extracted_blocks = _extract_blocks(soup.body)
- post_content = re.sub(r'(<html><body>|</body></html>)', '', str(soup))
- post_title = ""
- response = generate_response(row[5])
- if response is not None:
- post_title = response
- time.sleep(25)
- else:
- sys.exit()
- post_name = transliterate.translit(post_title, 'ru', reversed=True)
- post_name = post_name.lower()
- post_name = re.sub(r'[^a-z0-9\- ]', '', post_name)
- post_name = re.sub(r' ', '-', post_name)
- guid = ""
- #####################
- # TABLE posts
- #####################
- db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
- cur = db.cursor()
- sql = """
- INSERT INTO wp_posts(ID, post_author, post_date, post_date_gmt, post_content, post_title, post_excerpt, post_status, comment_status, ping_status, post_password, post_name, to_ping, pinged, post_modified, post_modified_gmt, post_content_filtered, post_parent, guid, menu_order, post_type, post_mime_type, comment_count)
- VALUES (NULL,'%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s',0)
- """ % (row[1], MySQLdb.escape_string(post_content).decode('utf-8'), MySQLdb.escape_string(post_title).decode('utf-8'), row[6], row[7], row[8], row[9], row[10], post_name, row[12], row[13], row[16], row[17], guid, row[19], row[20], row[21])
- cur.execute(sql)
- db.commit()
- sql = "SELECT LAST_INSERT_ID();"
- cur.execute(sql)
- tb_posts_id = cur.fetchone()
- if tb_posts_id is not None:
- guid = "https://adminkin.com/?p=%i" % tb_posts_id[0]
- sql = """
- UPDATE wp_posts SET guid='%s' WHERE ID='%i'
- """ % (guid, tb_posts_id[0])
- cur.execute(sql)
- db.commit()
- ###############################
- # TABLE term_taxonomy
- ###############################
- sql = """
- INSERT INTO wp_term_taxonomy(term_taxonomy_id, term_id, taxonomy, description, parent, count)
- VALUES (NULL,0,'post_translations','a:2:{s:2:"bg";i:%i;s:2:"ru";i:%i;}',0,2)
- """ % (tb_posts_id[0], row[0])
- cur.execute(sql)
- db.commit()
- sql = "SELECT LAST_INSERT_ID();"
- cur.execute(sql)
- tb_term_taxonomy_id = cur.fetchone()
- if tb_term_taxonomy_id is not None:
- sql = """
- UPDATE wp_term_taxonomy SET term_id='%i' WHERE term_taxonomy_id='%i'
- """ % (tb_term_taxonomy_id[0], tb_term_taxonomy_id[0])
- cur.execute(sql)
- db.commit()
- sql = """
- INSERT INTO wp_terms(term_id, name, slug, term_group)
- VALUES ('%i','pll_6111111111111','pll_6111111111111',0)
- """ % (tb_term_taxonomy_id[0])
- cur.execute(sql)
- db.commit()
- ################################
- # TABLE term_relationships
- ################################
- category_id_bg = 107
- sql = """
- SELECT wp_term_relationships.*, wp_term_taxonomy.taxonomy FROM wp_term_relationships LEFT JOIN wp_term_taxonomy
- ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id WHERE wp_term_relationships.object_id='%i'
- """ % row[0]
- cur.execute(sql)
- for tb_term_relationships in cur.fetchall():
- if (tb_term_relationships[3] == 'category') or (tb_term_relationships[3] == 'post_tag'):
- pattern_ru = r's:2:"ru";i:%i;' % tb_term_relationships[1]
- sql = """
- SELECT * FROM wp_term_taxonomy WHERE taxonomy LIKE 'term_translations'
- """
- cur.execute(sql)
- for res in cur.fetchall():
- search_result = re.search(pattern_ru, res[3])
- if search_result:
- search_result = re.search(pattern_bg, res[3])
- if search_result:
- id_bg = int(search_result.group(1))
- sql = """
- INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
- VALUES ('%i','%s','%s')
- """ % (tb_posts_id[0], id_bg, tb_term_relationships[2])
- cur.execute(sql)
- db.commit()
- sql = """
- UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%s'
- """ % (id_bg)
- cur.execute(sql)
- db.commit()
- if (tb_term_relationships[3] == 'category'):
- category_id_bg = id_bg
- break
- elif (tb_term_relationships[3] == 'language'):
- language_id_bg = 105
- sql = """
- INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
- VALUES ('%i','%i','%s')
- """ % (tb_posts_id[0], language_id_bg, tb_term_relationships[2])
- cur.execute(sql)
- db.commit()
- sql = """
- UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%i'
- """ % (language_id_bg)
- cur.execute(sql)
- db.commit()
- sql = """
- INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
- VALUES ('%i','%i',0)
- """ % (tb_posts_id[0], tb_term_taxonomy_id[0])
- cur.execute(sql)
- db.commit()
- sql = """
- INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
- VALUES ('%i','%i',0)
- """ % (row[0], tb_term_taxonomy_id[0])
- cur.execute(sql)
- db.commit()
- ########################
- # TABLE postmeta
- ########################
- sql = """
- SELECT * FROM wp_postmeta WHERE post_id = '%i'
- """ % row[0]
- cur.execute(sql)
- for tb_postmeta in cur.fetchall():
- meta_value = ""
- if (tb_postmeta[2] == '_yoast_wpseo_focuskw') or ((tb_postmeta[2] == '_yoast_wpseo_title') and (tb_postmeta[3] != '%%title%%')):
- response = generate_response(tb_postmeta[3])
- if response is not None:
- meta_value = response
- time.sleep(25)
- meta_value = MySQLdb.escape_string(meta_value).decode('utf-8')
- else:
- time.sleep(25)
- continue
- elif (tb_postmeta[2] == '_original_post'):
- meta_value = "https://adminkin.com/?p=%i" % tb_posts_id[0]
- elif (tb_postmeta[2] == '_yoast_wpseo_primary_category'):
- meta_value = "%i" % category_id_bg
- else:
- meta_value = MySQLdb.escape_string(tb_postmeta[3]).decode('utf-8')
- insert_into_table_postmeta(tb_posts_id[0], tb_postmeta[2], meta_value)
- ##############################
- # TABLE posts_translated
- ##############################
- insert_into_table_postsTranslated(row[0], row[14])
- insert_into_table_postsTranslated(tb_posts_id[0], row[14])
- db.close()
|