python
1{2 "cells": [3 {4 "cell_type": "code",5 "execution_count": 1,6 "id": "1c2ac545-4980-48e6-88bf-147781de098f",7 "metadata": {},8 "outputs": [9 {10 "name": "stdout",11 "output_type": "stream",12 "text": [13 "変換が完了しました。output.htmlに保存されました。\n"14 ]15 }16 ],17 "source": [18 "from docx import Document\n",19 "from docx.shared import RGBColor\n",20 "import re\n",21 "\n",22 "def convert_to_html(doc_path):\n",23 " doc = Document(doc_path)\n",24 " html_output = []\n",25 " text_buffer = []\n",26 " current_style = None\n",27 "\n",28 " def process_runs(runs):\n",29 " \"\"\"段落内のすべてのRunsを処理してHTMLタグを適用する\"\"\"\n",30 " processed_text = ''\n",31 " for run in runs:\n",32 " text = run.text\n",33 " is_bold = run.bold\n",34 " is_red = run.font.color and (run.font.color.rgb == RGBColor(255, 0, 0) or run.font.color.theme_color == 10)\n",35 " \n",36 " # スタイルの決定\n",37 " style = ''\n",38 " if is_bold and is_red:\n",39 " style = 'bred'\n",40 " elif is_bold:\n",41 " style = 'bold'\n",42 " elif is_red:\n",43 " style = 'red'\n",44 " \n",45 " if style:\n",46 " processed_text += f'<{style}>{text}</{style}>'\n",47 " else:\n",48 " processed_text += text\n",49 "\n",50 " return processed_text\n",51 "\n",52 " for paragraph in doc.paragraphs:\n",53 " # 段落全体を処理する\n",54 " text = process_runs(paragraph.runs).strip()\n",55 " \n",56 " if text:\n",57 " text_buffer.append(text)\n",58 "\n",59 " # テキスト全体を連結してスタイルを適用\n",60 " full_text = '\\n'.join(text_buffer)\n",61 " full_text = full_text.replace('\\n', '<br>')\n",62 "\n",63 " # スタイルの適用\n",64 " def apply_style(full_text):\n",65 " if 'bred' in full_text:\n",66 " full_text = full_text.replace('<bred>', '<b><font color=\"#ff0000\">').replace('</bred>', '</font></b>')\n",67 " if 'bold' in full_text:\n",68 " full_text = full_text.replace('<bold>', '<font class=\"k-bold\">').replace('</bold>', '</font>')\n",69 " if 'red' in full_text:\n",70 " full_text = full_text.replace('<red>', '<font color=\"#ff0000\">').replace('</red>', '</font>')\n",71 " return full_text\n",72 "\n",73 " styled_text = apply_style(full_text)\n",74 " html_output.append(styled_text)\n",75 " \n",76 " return '\\n'.join(html_output)\n",77 "\n",78 "# 実行例\n",79 "doc_path = 'C:\\\\Users\\\\99999999\\\\Downloads\\\\原稿.docx' # 変換するWord文書のパス\n",80 "html_content = convert_to_html(doc_path)\n",81 "\n",82 "# HTMLファイルに保存\n",83 "with open('output.html', 'w', encoding='utf-8') as file:\n",84 " file.write(html_content)\n",85 "\n",86 "print(\"変換が完了しました。output.htmlに保存されました。\")\n"87 ]88 },89 {90 "cell_type": "code",91 "execution_count": None,92 "id": "f4490286-df11-4274-8acf-4049ee8ddb3e",93 "metadata": {},94 "outputs": [],95 "source": []96 }97 ],98 "metadata": {99 "kernelspec": {100 "display_name": "Python 3 (ipykernel)",101 "language": "python",102 "name": "python3"103 },104 "language_info": {105 "codemirror_mode": {106 "name": "ipython",107 "version": 3108 },109 "file_extension": ".py",110 "mimetype": "text/x-python",111 "name": "python",112 "nbconvert_exporter": "python",113 "pygments_lexer": "ipython3",114 "version": "3.12.0"115 }116 },117 "nbformat": 4,118 "nbformat_minor": 5119}120

0 コメント