# py -3 # -*- coding: utf8 -*- """ Ebook Processor. Part of ebook build chain, along with WordCleaner7 """ from pathlib import Path import pprint import os, sys, re, shutil, time from itertools import chain from sortedcontainers import SortedSet from betools import CmdLine, visitDir, ruler, head ebookName = "onjava" rootPath = Path(r"C:\Users\Bruce\Dropbox\___OnJava") docm = rootPath / "OnJava.docm" ebookBuildPath = rootPath / "ebook_build" html = ebookBuildPath / (ebookName + ".html") ebookResources = rootPath / "ebook_resources" css = ebookResources / (ebookName + ".css") fonts = ebookResources.glob("ubuntumono-*") start_code_tag = '[$code$]' end_code_tag = '[$end_code$]' @CmdLine('s') def show_all_code_tags(): """ Shows all html "Code" tag variations used in book. """ tag = re.compile("<.*?>") with html.open(encoding="utf8") as ht: tags = SortedSet(tag.findall(ht.read())) for t in tags: if "Code" in t: print(t) count = 0 @CmdLine('r') def rewrite_code_blocks(): """ Find contiguous blocks of

. """ codeblock = re.compile('''(

.*?

\s*)+''', re.DOTALL) codeline = re.compile('''

(.*?)

\s*''', re.DOTALL) def rewrite_code_line(matchobj): return matchobj.group(1).rstrip() + "
" def rewrite_code_block(matchobj): global count count += 1 return start_code_tag + \ codeline.sub(rewrite_code_line, matchobj.group(0)) + \ "\n" + end_code_tag + "\n" with html.open(encoding="utf8") as ht: rewritten = codeblock.sub(rewrite_code_block, ht.read()) # with html.open('w', encoding="utf8") as ht: with html.with_name(html.stem + "-2.html").open('w', encoding="utf8") as ht: ht.write(rewritten) print(count) style = """ """ blank_table_row = """\ """ fixed_table_row = """\ """ @CmdLine('c') def cleanup_stripped_html(): """ Clean up stripped HTML -- final housekeeping """ fixes = [ (start_code_tag, "

"),
        (end_code_tag, "

"), ("", style), ('', '

'), (blank_table_row, fixed_table_row), ] with html.with_name(html.stem + "-2.html").open(encoding="utf8") as ht: doc = ht.read() for fix in fixes: doc = doc.replace(*fix) with html.with_name(html.stem + "-3.html").open('w', encoding="utf8") as ht: ht.write(doc) for font in fonts: shutil.copy(str(font), str(ebookBuildPath)) def copy_resources(): """ Copy resources into book build directory """ shutil.copy(str(css), str(ebookBuildPath)) @CmdLine('f') def fresh_start(): "Recreate ebookBuildPath" print("Cleaning ...") if ebookBuildPath.exists(): shutil.rmtree(str(ebookBuildPath)) time.sleep(1) ebookBuildPath.mkdir() shutil.copy(str(docm), str(ebookBuildPath)) os.chdir(str(ebookBuildPath)) print("Convert to HTML") os.system('''WordCleaner7''') show_all_code_tags() rewrite_code_blocks() print("TEST Clean up existing HTML and remove formatting") os.system('''WordCleaner7''') cleanup_stripped_html() if __name__ == '__main__': CmdLine.run()