|
@@ -12,6 +12,30 @@ import openai
|
|
|
|
|
|
blocks = ["p", "h1", "h2", "h3", "h4", "h5", "img", "li"]
|
|
|
|
|
|
+def href_rus_to_bg(db, cur, pattern_bg, text):
|
|
|
+ pattern_url = r'https://adminkin.com/([^/]+)'
|
|
|
+ for rus_postname in re.findall(pattern_url, text):
|
|
|
+ sql = """
|
|
|
+ SELECT wp_term_taxonomy.taxonomy, wp_term_taxonomy.description FROM wp_posts
|
|
|
+ LEFT JOIN wp_term_relationships ON wp_posts.ID=wp_term_relationships.object_id
|
|
|
+ LEFT JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id
|
|
|
+ WHERE wp_posts.post_name LIKE '%s';
|
|
|
+ """ % rus_postname
|
|
|
+ cur.execute(sql)
|
|
|
+ for row in cur.fetchall():
|
|
|
+ if (row[0] == 'post_translations'):
|
|
|
+ search_result = re.search(pattern_bg, row[1])
|
|
|
+ if search_result:
|
|
|
+ bg_postid = int(search_result.group(1))
|
|
|
+ sql = "SELECT post_name FROM wp_posts WHERE ID = '%i'" % bg_postid
|
|
|
+ cur.execute(sql)
|
|
|
+ row2 = cur.fetchone()
|
|
|
+ if row2 is not None:
|
|
|
+ bg_postname = "bg/%s" % row2[0]
|
|
|
+ text = re.sub(rus_postname, bg_postname, text)
|
|
|
+ break
|
|
|
+ return text
|
|
|
+
|
|
|
def generate_response(prompt):
|
|
|
openai.api_key = "OPENAI_API_KEY"
|
|
|
prompt = "Требуется только перевод с русского языка на болгарский язык. Если в оригинале есть английские слова, то они должны присутствовать в переводе. Если в оригинале встречается html разметка, то вставлять её в соответствующих местах перевода. Никакие пояснения к переводу не требуются. Переведи: \"" + prompt + "\""
|
|
@@ -25,7 +49,12 @@ def generate_response(prompt):
|
|
|
)
|
|
|
|
|
|
if 'choices' in response and len(response.choices) > 0:
|
|
|
- return response.choices[0].message.content
|
|
|
+ text = response.choices[0].message.content
|
|
|
+ if text.startswith('"'):
|
|
|
+ text = text[1:]
|
|
|
+ if text.endswith('"'):
|
|
|
+ text = text[:-1]
|
|
|
+ return text
|
|
|
else:
|
|
|
return None
|
|
|
|
|
@@ -98,7 +127,8 @@ if __name__ == "__main__":
|
|
|
cur.execute(sql)
|
|
|
row = cur.fetchone()
|
|
|
if row is not None:
|
|
|
- soup = BeautifulSoup(row[4], features="lxml")
|
|
|
+ text = href_rus_to_bg(db, cur, pattern_bg, row[4])
|
|
|
+ soup = BeautifulSoup(text, features="lxml")
|
|
|
extracted_blocks = _extract_blocks(soup.body)
|
|
|
post_content = re.sub(r'(<html><body>|</body></html>)', '', str(soup))
|
|
|
|
|
@@ -112,7 +142,7 @@ if __name__ == "__main__":
|
|
|
|
|
|
post_name = transliterate.translit(post_title, 'ru', reversed=True)
|
|
|
post_name = post_name.lower()
|
|
|
- post_name = re.sub(r'[^a-z,0-9,-, ]', '', post_name)
|
|
|
+ post_name = re.sub(r'[^a-z0-9\- ]', '', post_name)
|
|
|
post_name = re.sub(r' ', '-', post_name)
|
|
|
|
|
|
guid = ""
|