More fixes for Amazon books, fixing identity checks, started on Topaz.

4 years ago · 939cdbb0c9
parent dc27c36761
commit 939cdbb0c9
8 changed files with 558 additions and 540 deletions
--- a/DeDRM_plugin/convert2xml.py
+++ b/DeDRM_plugin/convert2xml.py
@ -56,7 +56,7 @@ def readEncodedNumber(file):
            c = file.read(1)
            if (len(c) == 0):
                return None
-            data = ord(c)
+            data = c[0]
            datax = (datax <<7) + (data & 0x7F)
        data = datax

@ -188,232 +188,232 @@ class PageParser(object):
    # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)

    token_tags = {
-        'x'            : (1, 'scalar_number', 0, 0),
-        'y'            : (1, 'scalar_number', 0, 0),
-        'h'            : (1, 'scalar_number', 0, 0),
-        'w'            : (1, 'scalar_number', 0, 0),
-        'firstWord'    : (1, 'scalar_number', 0, 0),
-        'lastWord'     : (1, 'scalar_number', 0, 0),
-        'rootID'       : (1, 'scalar_number', 0, 0),
-        'stemID'       : (1, 'scalar_number', 0, 0),
-        'type'         : (1, 'scalar_text', 0, 0),
-
-        'info'            : (0, 'number', 1, 0),
-
-        'info.word'            : (0, 'number', 1, 1),
-        'info.word.ocrText'    : (1, 'text', 0, 0),
-        'info.word.firstGlyph' : (1, 'raw', 0, 0),
-        'info.word.lastGlyph'  : (1, 'raw', 0, 0),
-        'info.word.bl'         : (1, 'raw', 0, 0),
-        'info.word.link_id'    : (1, 'number', 0, 0),
-
-        'glyph'           : (0, 'number', 1, 1),
-        'glyph.x'         : (1, 'number', 0, 0),
-        'glyph.y'         : (1, 'number', 0, 0),
-        'glyph.glyphID'   : (1, 'number', 0, 0),
-
-        'dehyphen'          : (0, 'number', 1, 1),
-        'dehyphen.rootID'   : (1, 'number', 0, 0),
-        'dehyphen.stemID'   : (1, 'number', 0, 0),
-        'dehyphen.stemPage' : (1, 'number', 0, 0),
-        'dehyphen.sh'       : (1, 'number', 0, 0),
-
-        'links'        : (0, 'number', 1, 1),
-        'links.page'   : (1, 'number', 0, 0),
-        'links.rel'    : (1, 'number', 0, 0),
-        'links.row'    : (1, 'number', 0, 0),
-        'links.title'  : (1, 'text', 0, 0),
-        'links.href'   : (1, 'text', 0, 0),
-        'links.type'   : (1, 'text', 0, 0),
-        'links.id'     : (1, 'number', 0, 0),
-
-        'paraCont'          : (0, 'number', 1, 1),
-        'paraCont.rootID'   : (1, 'number', 0, 0),
-        'paraCont.stemID'   : (1, 'number', 0, 0),
-        'paraCont.stemPage' : (1, 'number', 0, 0),
-
-        'paraStems'        : (0, 'number', 1, 1),
-        'paraStems.stemID' : (1, 'number', 0, 0),
-
-        'wordStems'          : (0, 'number', 1, 1),
-        'wordStems.stemID'   : (1, 'number', 0, 0),
-
-        'empty'          : (1, 'snippets', 1, 0),
-
-        'page'           : (1, 'snippets', 1, 0),
-        'page.class'     : (1, 'scalar_text', 0, 0),
-        'page.pageid'    : (1, 'scalar_text', 0, 0),
-        'page.pagelabel' : (1, 'scalar_text', 0, 0),
-        'page.type'      : (1, 'scalar_text', 0, 0),
-        'page.h'         : (1, 'scalar_number', 0, 0),
-        'page.w'         : (1, 'scalar_number', 0, 0),
-        'page.startID' : (1, 'scalar_number', 0, 0),
-
-        'group'           : (1, 'snippets', 1, 0),
-        'group.class'     : (1, 'scalar_text', 0, 0),
-        'group.type'      : (1, 'scalar_text', 0, 0),
-        'group._tag'      : (1, 'scalar_text', 0, 0),
-        'group.orientation': (1, 'scalar_text', 0, 0),
-
-        'region'           : (1, 'snippets', 1, 0),
-        'region.class'     : (1, 'scalar_text', 0, 0),
-        'region.type'      : (1, 'scalar_text', 0, 0),
-        'region.x'         : (1, 'scalar_number', 0, 0),
-        'region.y'         : (1, 'scalar_number', 0, 0),
-        'region.h'         : (1, 'scalar_number', 0, 0),
-        'region.w'         : (1, 'scalar_number', 0, 0),
-        'region.orientation' : (1, 'scalar_text', 0, 0),
-
-        'empty_text_region' : (1, 'snippets', 1, 0),
-
-        'img'                   : (1, 'snippets', 1, 0),
-        'img.x'                 : (1, 'scalar_number', 0, 0),
-        'img.y'                 : (1, 'scalar_number', 0, 0),
-        'img.h'                 : (1, 'scalar_number', 0, 0),
-        'img.w'                 : (1, 'scalar_number', 0, 0),
-        'img.src'               : (1, 'scalar_number', 0, 0),
-        'img.color_src'         : (1, 'scalar_number', 0, 0),
-        'img.gridSize'          : (1, 'scalar_number', 0, 0),
-        'img.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
-        'img.gridTopCenter'     : (1, 'scalar_number', 0, 0),
-        'img.gridBeginCenter'   : (1, 'scalar_number', 0, 0),
-        'img.gridEndCenter'     : (1, 'scalar_number', 0, 0),
-        'img.image_type'        : (1, 'scalar_number', 0, 0),
-
-        'paragraph'           : (1, 'snippets', 1, 0),
-        'paragraph.class'     : (1, 'scalar_text', 0, 0),
-        'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
-        'paragraph.lastWord'  : (1, 'scalar_number', 0, 0),
-        'paragraph.lastWord'  : (1, 'scalar_number', 0, 0),
-        'paragraph.gridSize'  : (1, 'scalar_number', 0, 0),
-        'paragraph.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
-        'paragraph.gridTopCenter'     : (1, 'scalar_number', 0, 0),
-        'paragraph.gridBeginCenter'   : (1, 'scalar_number', 0, 0),
-        'paragraph.gridEndCenter'     : (1, 'scalar_number', 0, 0),
-
-
-        'word_semantic'           : (1, 'snippets', 1, 1),
-        'word_semantic.type'      : (1, 'scalar_text', 0, 0),
-        'word_semantic.class'     : (1, 'scalar_text', 0, 0),
-        'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
-        'word_semantic.lastWord'  : (1, 'scalar_number', 0, 0),
-        'word_semantic.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
-        'word_semantic.gridTopCenter'     : (1, 'scalar_number', 0, 0),
-        'word_semantic.gridBeginCenter'   : (1, 'scalar_number', 0, 0),
-        'word_semantic.gridEndCenter'     : (1, 'scalar_number', 0, 0),
-
-        'word'            : (1, 'snippets', 1, 0),
-        'word.type'       : (1, 'scalar_text', 0, 0),
-        'word.class'      : (1, 'scalar_text', 0, 0),
-        'word.firstGlyph' : (1, 'scalar_number', 0, 0),
-        'word.lastGlyph'  : (1, 'scalar_number', 0, 0),
-
-        '_span'           : (1, 'snippets', 1, 0),
-        '_span.class'     : (1, 'scalar_text', 0, 0),
-        '_span.firstWord' : (1, 'scalar_number', 0, 0),
-        '_span.lastWord'  : (1, 'scalar_number', 0, 0),
-        '_span.gridSize'  : (1, 'scalar_number', 0, 0),
-        '_span.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
-        '_span.gridTopCenter' : (1, 'scalar_number', 0, 0),
-        '_span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
-        '_span.gridEndCenter' : (1, 'scalar_number', 0, 0),
-
-        'span'           : (1, 'snippets', 1, 0),
-        'span.firstWord' : (1, 'scalar_number', 0, 0),
-        'span.lastWord'  : (1, 'scalar_number', 0, 0),
-        'span.gridSize'  : (1, 'scalar_number', 0, 0),
-        'span.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
-        'span.gridTopCenter' : (1, 'scalar_number', 0, 0),
-        'span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
-        'span.gridEndCenter' : (1, 'scalar_number', 0, 0),
-
-        'extratokens'                   : (1, 'snippets', 1, 0),
-        'extratokens.class'             : (1, 'scalar_text', 0, 0),
-        'extratokens.type'              : (1, 'scalar_text', 0, 0),
-        'extratokens.firstGlyph'        : (1, 'scalar_number', 0, 0),
-        'extratokens.lastGlyph'         : (1, 'scalar_number', 0, 0),
-        'extratokens.gridSize'          : (1, 'scalar_number', 0, 0),
-        'extratokens.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
-        'extratokens.gridTopCenter'     : (1, 'scalar_number', 0, 0),
-        'extratokens.gridBeginCenter'   : (1, 'scalar_number', 0, 0),
-        'extratokens.gridEndCenter'     : (1, 'scalar_number', 0, 0),
-
-        'glyph.h'      : (1, 'number', 0, 0),
-        'glyph.w'      : (1, 'number', 0, 0),
-        'glyph.use'    : (1, 'number', 0, 0),
-        'glyph.vtx'    : (1, 'number', 0, 1),
-        'glyph.len'    : (1, 'number', 0, 1),
-        'glyph.dpi'    : (1, 'number', 0, 0),
-        'vtx'          : (0, 'number', 1, 1),
-        'vtx.x'        : (1, 'number', 0, 0),
-        'vtx.y'        : (1, 'number', 0, 0),
-        'len'          : (0, 'number', 1, 1),
-        'len.n'        : (1, 'number', 0, 0),
-
-        'book'         : (1, 'snippets', 1, 0),
-        'version'      : (1, 'snippets', 1, 0),
-        'version.FlowEdit_1_id'            : (1, 'scalar_text', 0, 0),
-        'version.FlowEdit_1_version'       : (1, 'scalar_text', 0, 0),
-        'version.Schema_id'                : (1, 'scalar_text', 0, 0),
-        'version.Schema_version'           : (1, 'scalar_text', 0, 0),
-        'version.Topaz_version'            : (1, 'scalar_text', 0, 0),
-        'version.WordDetailEdit_1_id'      : (1, 'scalar_text', 0, 0),
-        'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
-        'version.ZoneEdit_1_id'            : (1, 'scalar_text', 0, 0),
-        'version.ZoneEdit_1_version'       : (1, 'scalar_text', 0, 0),
-        'version.chapterheaders'           : (1, 'scalar_text', 0, 0),
-        'version.creation_date'            : (1, 'scalar_text', 0, 0),
-        'version.header_footer'            : (1, 'scalar_text', 0, 0),
-        'version.init_from_ocr'            : (1, 'scalar_text', 0, 0),
-        'version.letter_insertion'         : (1, 'scalar_text', 0, 0),
-        'version.xmlinj_convert'           : (1, 'scalar_text', 0, 0),
-        'version.xmlinj_reflow'            : (1, 'scalar_text', 0, 0),
-        'version.xmlinj_transform'         : (1, 'scalar_text', 0, 0),
-        'version.findlists'                : (1, 'scalar_text', 0, 0),
-        'version.page_num'                 : (1, 'scalar_text', 0, 0),
-        'version.page_type'                : (1, 'scalar_text', 0, 0),
-        'version.bad_text'                 : (1, 'scalar_text', 0, 0),
-        'version.glyph_mismatch'           : (1, 'scalar_text', 0, 0),
-        'version.margins'                  : (1, 'scalar_text', 0, 0),
-        'version.staggered_lines'          : (1, 'scalar_text', 0, 0),
-        'version.paragraph_continuation'   : (1, 'scalar_text', 0, 0),
-        'version.toc'                      : (1, 'scalar_text', 0, 0),
-
-        'stylesheet'                : (1, 'snippets', 1, 0),
-        'style'                     : (1, 'snippets', 1, 0),
-        'style._tag'                : (1, 'scalar_text', 0, 0),
-        'style.type'                : (1, 'scalar_text', 0, 0),
-        'style._after_type'         : (1, 'scalar_text', 0, 0),
-        'style._parent_type'        : (1, 'scalar_text', 0, 0),
-        'style._after_parent_type'  : (1, 'scalar_text', 0, 0),
-        'style.class'               : (1, 'scalar_text', 0, 0),
-        'style._after_class'        : (1, 'scalar_text', 0, 0),
-        'rule'                      : (1, 'snippets', 1, 0),
-        'rule.attr'                 : (1, 'scalar_text', 0, 0),
-        'rule.value'                : (1, 'scalar_text', 0, 0),
-
-        'original'      : (0, 'number', 1, 1),
-        'original.pnum' : (1, 'number', 0, 0),
-        'original.pid'  : (1, 'text', 0, 0),
-        'pages'        : (0, 'number', 1, 1),
-        'pages.ref'    : (1, 'number', 0, 0),
-        'pages.id'     : (1, 'number', 0, 0),
-        'startID'      : (0, 'number', 1, 1),
-        'startID.page' : (1, 'number', 0, 0),
-        'startID.id'   : (1, 'number', 0, 0),
-
-        'median_d'          : (1, 'number', 0, 0),
-        'median_h'          : (1, 'number', 0, 0),
-        'median_firsty'     : (1, 'number', 0, 0),
-        'median_lasty'      : (1, 'number', 0, 0),
-
-        'num_footers_maybe' : (1, 'number', 0, 0),
-        'num_footers_yes'   : (1, 'number', 0, 0),
-        'num_headers_maybe' : (1, 'number', 0, 0),
-        'num_headers_yes'   : (1, 'number', 0, 0),
-
-        'tracking'          : (1, 'number', 0, 0),
-        'src'               : (1, 'text', 0, 0),
+        b'x'            : (1, 'scalar_number', 0, 0),
+        b'y'            : (1, 'scalar_number', 0, 0),
+        b'h'            : (1, 'scalar_number', 0, 0),
+        b'w'            : (1, 'scalar_number', 0, 0),
+        b'firstWord'    : (1, 'scalar_number', 0, 0),
+        b'lastWord'     : (1, 'scalar_number', 0, 0),
+        b'rootID'       : (1, 'scalar_number', 0, 0),
+        b'stemID'       : (1, 'scalar_number', 0, 0),
+        b'type'         : (1, 'scalar_text', 0, 0),
+
+        b'info'            : (0, 'number', 1, 0),
+
+        b'info.word'            : (0, 'number', 1, 1),
+        b'info.word.ocrText'    : (1, 'text', 0, 0),
+        b'info.word.firstGlyph' : (1, 'raw', 0, 0),
+        b'info.word.lastGlyph'  : (1, 'raw', 0, 0),
+        b'info.word.bl'         : (1, 'raw', 0, 0),
+        b'info.word.link_id'    : (1, 'number', 0, 0),
+
+        b'glyph'           : (0, 'number', 1, 1),
+        b'glyph.x'         : (1, 'number', 0, 0),
+        b'glyph.y'         : (1, 'number', 0, 0),
+        b'glyph.glyphID'   : (1, 'number', 0, 0),
+
+        b'dehyphen'          : (0, 'number', 1, 1),
+        b'dehyphen.rootID'   : (1, 'number', 0, 0),
+        b'dehyphen.stemID'   : (1, 'number', 0, 0),
+        b'dehyphen.stemPage' : (1, 'number', 0, 0),
+        b'dehyphen.sh'       : (1, 'number', 0, 0),
+
+        b'links'        : (0, 'number', 1, 1),
+        b'links.page'   : (1, 'number', 0, 0),
+        b'links.rel'    : (1, 'number', 0, 0),
+        b'links.row'    : (1, 'number', 0, 0),
+        b'links.title'  : (1, 'text', 0, 0),
+        b'links.href'   : (1, 'text', 0, 0),
+        b'links.type'   : (1, 'text', 0, 0),
+        b'links.id'     : (1, 'number', 0, 0),
+
+        b'paraCont'          : (0, 'number', 1, 1),
+        b'paraCont.rootID'   : (1, 'number', 0, 0),
+        b'paraCont.stemID'   : (1, 'number', 0, 0),
+        b'paraCont.stemPage' : (1, 'number', 0, 0),
+
+        b'paraStems'        : (0, 'number', 1, 1),
+        b'paraStems.stemID' : (1, 'number', 0, 0),
+
+        b'wordStems'          : (0, 'number', 1, 1),
+        b'wordStems.stemID'   : (1, 'number', 0, 0),
+
+        b'empty'          : (1, 'snippets', 1, 0),
+
+        b'page'           : (1, 'snippets', 1, 0),
+        b'page.class'     : (1, 'scalar_text', 0, 0),
+        b'page.pageid'    : (1, 'scalar_text', 0, 0),
+        b'page.pagelabel' : (1, 'scalar_text', 0, 0),
+        b'page.type'      : (1, 'scalar_text', 0, 0),
+        b'page.h'         : (1, 'scalar_number', 0, 0),
+        b'page.w'         : (1, 'scalar_number', 0, 0),
+        b'page.startID' : (1, 'scalar_number', 0, 0),
+
+        b'group'           : (1, 'snippets', 1, 0),
+        b'group.class'     : (1, 'scalar_text', 0, 0),
+        b'group.type'      : (1, 'scalar_text', 0, 0),
+        b'group._tag'      : (1, 'scalar_text', 0, 0),
+        b'group.orientation': (1, 'scalar_text', 0, 0),
+
+        b'region'           : (1, 'snippets', 1, 0),
+        b'region.class'     : (1, 'scalar_text', 0, 0),
+        b'region.type'      : (1, 'scalar_text', 0, 0),
+        b'region.x'         : (1, 'scalar_number', 0, 0),
+        b'region.y'         : (1, 'scalar_number', 0, 0),
+        b'region.h'         : (1, 'scalar_number', 0, 0),
+        b'region.w'         : (1, 'scalar_number', 0, 0),
+        b'region.orientation' : (1, 'scalar_text', 0, 0),
+
+        b'empty_text_region' : (1, 'snippets', 1, 0),
+
+        b'img'                   : (1, 'snippets', 1, 0),
+        b'img.x'                 : (1, 'scalar_number', 0, 0),
+        b'img.y'                 : (1, 'scalar_number', 0, 0),
+        b'img.h'                 : (1, 'scalar_number', 0, 0),
+        b'img.w'                 : (1, 'scalar_number', 0, 0),
+        b'img.src'               : (1, 'scalar_number', 0, 0),
+        b'img.color_src'         : (1, 'scalar_number', 0, 0),
+        b'img.gridSize'          : (1, 'scalar_number', 0, 0),
+        b'img.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
+        b'img.gridTopCenter'     : (1, 'scalar_number', 0, 0),
+        b'img.gridBeginCenter'   : (1, 'scalar_number', 0, 0),
+        b'img.gridEndCenter'     : (1, 'scalar_number', 0, 0),
+        b'img.image_type'        : (1, 'scalar_number', 0, 0),
+
+        b'paragraph'           : (1, 'snippets', 1, 0),
+        b'paragraph.class'     : (1, 'scalar_text', 0, 0),
+        b'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
+        b'paragraph.lastWord'  : (1, 'scalar_number', 0, 0),
+        b'paragraph.lastWord'  : (1, 'scalar_number', 0, 0),
+        b'paragraph.gridSize'  : (1, 'scalar_number', 0, 0),
+        b'paragraph.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
+        b'paragraph.gridTopCenter'     : (1, 'scalar_number', 0, 0),
+        b'paragraph.gridBeginCenter'   : (1, 'scalar_number', 0, 0),
+        b'paragraph.gridEndCenter'     : (1, 'scalar_number', 0, 0),
+
+
+        b'word_semantic'           : (1, 'snippets', 1, 1),
+        b'word_semantic.type'      : (1, 'scalar_text', 0, 0),
+        b'word_semantic.class'     : (1, 'scalar_text', 0, 0),
+        b'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
+        b'word_semantic.lastWord'  : (1, 'scalar_number', 0, 0),
+        b'word_semantic.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
+        b'word_semantic.gridTopCenter'     : (1, 'scalar_number', 0, 0),
+        b'word_semantic.gridBeginCenter'   : (1, 'scalar_number', 0, 0),
+        b'word_semantic.gridEndCenter'     : (1, 'scalar_number', 0, 0),
+
+        b'word'            : (1, 'snippets', 1, 0),
+        b'word.type'       : (1, 'scalar_text', 0, 0),
+        b'word.class'      : (1, 'scalar_text', 0, 0),
+        b'word.firstGlyph' : (1, 'scalar_number', 0, 0),
+        b'word.lastGlyph'  : (1, 'scalar_number', 0, 0),
+
+        b'_span'           : (1, 'snippets', 1, 0),
+        b'_span.class'     : (1, 'scalar_text', 0, 0),
+        b'_span.firstWord' : (1, 'scalar_number', 0, 0),
+        b'_span.lastWord'  : (1, 'scalar_number', 0, 0),
+        b'_span.gridSize'  : (1, 'scalar_number', 0, 0),
+        b'_span.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
+        b'_span.gridTopCenter' : (1, 'scalar_number', 0, 0),
+        b'_span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
+        b'_span.gridEndCenter' : (1, 'scalar_number', 0, 0),
+
+        b'span'           : (1, 'snippets', 1, 0),
+        b'span.firstWord' : (1, 'scalar_number', 0, 0),
+        b'span.lastWord'  : (1, 'scalar_number', 0, 0),
+        b'span.gridSize'  : (1, 'scalar_number', 0, 0),
+        b'span.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
+        b'span.gridTopCenter' : (1, 'scalar_number', 0, 0),
+        b'span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
+        b'span.gridEndCenter' : (1, 'scalar_number', 0, 0),
+
+        b'extratokens'                   : (1, 'snippets', 1, 0),
+        b'extratokens.class'             : (1, 'scalar_text', 0, 0),
+        b'extratokens.type'              : (1, 'scalar_text', 0, 0),
+        b'extratokens.firstGlyph'        : (1, 'scalar_number', 0, 0),
+        b'extratokens.lastGlyph'         : (1, 'scalar_number', 0, 0),
+        b'extratokens.gridSize'          : (1, 'scalar_number', 0, 0),
+        b'extratokens.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
+        b'extratokens.gridTopCenter'     : (1, 'scalar_number', 0, 0),
+        b'extratokens.gridBeginCenter'   : (1, 'scalar_number', 0, 0),
+        b'extratokens.gridEndCenter'     : (1, 'scalar_number', 0, 0),
+
+        b'glyph.h'      : (1, 'number', 0, 0),
+        b'glyph.w'      : (1, 'number', 0, 0),
+        b'glyph.use'    : (1, 'number', 0, 0),
+        b'glyph.vtx'    : (1, 'number', 0, 1),
+        b'glyph.len'    : (1, 'number', 0, 1),
+        b'glyph.dpi'    : (1, 'number', 0, 0),
+        b'vtx'          : (0, 'number', 1, 1),
+        b'vtx.x'        : (1, 'number', 0, 0),
+        b'vtx.y'        : (1, 'number', 0, 0),
+        b'len'          : (0, 'number', 1, 1),
+        b'len.n'        : (1, 'number', 0, 0),
+
+        b'book'         : (1, 'snippets', 1, 0),
+        b'version'      : (1, 'snippets', 1, 0),
+        b'version.FlowEdit_1_id'            : (1, 'scalar_text', 0, 0),
+        b'version.FlowEdit_1_version'       : (1, 'scalar_text', 0, 0),
+        b'version.Schema_id'                : (1, 'scalar_text', 0, 0),
+        b'version.Schema_version'           : (1, 'scalar_text', 0, 0),
+        b'version.Topaz_version'            : (1, 'scalar_text', 0, 0),
+        b'version.WordDetailEdit_1_id'      : (1, 'scalar_text', 0, 0),
+        b'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
+        b'version.ZoneEdit_1_id'            : (1, 'scalar_text', 0, 0),
+        b'version.ZoneEdit_1_version'       : (1, 'scalar_text', 0, 0),
+        b'version.chapterheaders'           : (1, 'scalar_text', 0, 0),
+        b'version.creation_date'            : (1, 'scalar_text', 0, 0),
+        b'version.header_footer'            : (1, 'scalar_text', 0, 0),
+        b'version.init_from_ocr'            : (1, 'scalar_text', 0, 0),
+        b'version.letter_insertion'         : (1, 'scalar_text', 0, 0),
+        b'version.xmlinj_convert'           : (1, 'scalar_text', 0, 0),
+        b'version.xmlinj_reflow'            : (1, 'scalar_text', 0, 0),
+        b'version.xmlinj_transform'         : (1, 'scalar_text', 0, 0),
+        b'version.findlists'                : (1, 'scalar_text', 0, 0),
+        b'version.page_num'                 : (1, 'scalar_text', 0, 0),
+        b'version.page_type'                : (1, 'scalar_text', 0, 0),
+        b'version.bad_text'                 : (1, 'scalar_text', 0, 0),
+        b'version.glyph_mismatch'           : (1, 'scalar_text', 0, 0),
+        b'version.margins'                  : (1, 'scalar_text', 0, 0),
+        b'version.staggered_lines'          : (1, 'scalar_text', 0, 0),
+        b'version.paragraph_continuation'   : (1, 'scalar_text', 0, 0),
+        b'version.toc'                      : (1, 'scalar_text', 0, 0),
+
+        b'stylesheet'                : (1, 'snippets', 1, 0),
+        b'style'                     : (1, 'snippets', 1, 0),
+        b'style._tag'                : (1, 'scalar_text', 0, 0),
+        b'style.type'                : (1, 'scalar_text', 0, 0),
+        b'style._after_type'         : (1, 'scalar_text', 0, 0),
+        b'style._parent_type'        : (1, 'scalar_text', 0, 0),
+        b'style._after_parent_type'  : (1, 'scalar_text', 0, 0),
+        b'style.class'               : (1, 'scalar_text', 0, 0),
+        b'style._after_class'        : (1, 'scalar_text', 0, 0),
+        b'rule'                      : (1, 'snippets', 1, 0),
+        b'rule.attr'                 : (1, 'scalar_text', 0, 0),
+        b'rule.value'                : (1, 'scalar_text', 0, 0),
+
+        b'original'      : (0, 'number', 1, 1),
+        b'original.pnum' : (1, 'number', 0, 0),
+        b'original.pid'  : (1, 'text', 0, 0),
+        b'pages'        : (0, 'number', 1, 1),
+        b'pages.ref'    : (1, 'number', 0, 0),
+        b'pages.id'     : (1, 'number', 0, 0),
+        b'startID'      : (0, 'number', 1, 1),
+        b'startID.page' : (1, 'number', 0, 0),
+        b'startID.id'   : (1, 'number', 0, 0),
+
+        b'median_d'          : (1, 'number', 0, 0),
+        b'median_h'          : (1, 'number', 0, 0),
+        b'median_firsty'     : (1, 'number', 0, 0),
+        b'median_lasty'      : (1, 'number', 0, 0),
+
+        b'num_footers_maybe' : (1, 'number', 0, 0),
+        b'num_footers_yes'   : (1, 'number', 0, 0),
+        b'num_headers_maybe' : (1, 'number', 0, 0),
+        b'num_headers_yes'   : (1, 'number', 0, 0),
+
+        b'tracking'          : (1, 'number', 0, 0),
+        b'src'               : (1, 'text', 0, 0),

     }

@ -430,7 +430,7 @@ class PageParser(object):
        cnt = len(self.tagpath)
        if i < cnt : result = self.tagpath[i]
        for j in range(i+1, cnt) :
-            result += '.' + self.tagpath[j]
+            result += b'.' + self.tagpath[j]
        return result


@ -505,7 +505,7 @@ class PageParser(object):

            if (subtags == 1):
                ntags = readEncodedNumber(self.fo)
-                if self.debug : print('subtags: ' + token + ' has ' + str(ntags))
+                if self.debug : print('subtags: ', token , ' has ' , str(ntags))
                for j in range(ntags):
                    val = readEncodedNumber(self.fo)
                    subtagres.append(self.procToken(self.dict.lookup(val)))
@ -613,7 +613,7 @@ class PageParser(object):
        subtagList = tag[1]
        argtype = tag[2]
        argList = tag[3]
-        nname = prefix + '.' + name
+        nname = prefix + b'.' + name
        nsubtaglist = []
        for j in subtagList:
            nsubtaglist.append(self.updateName(j,prefix))
@ -662,34 +662,34 @@ class PageParser(object):
        subtagList = node[1]
        argtype = node[2]
        argList = node[3]
-        fullpathname = name.split('.')
+        fullpathname = name.split(b'.')
        nodename = fullpathname.pop()
        ilvl = len(fullpathname)
-        indent = ' ' * (3 * ilvl)
+        indent = b' ' * (3 * ilvl)
        rlst = []
-        rlst.append(indent + '<' + nodename + '>')
+        rlst.append(indent + b'<' + nodename + b'>')
        if len(argList) > 0:
            alst = []
            for j in argList:
-                if (argtype == 'text') or (argtype == 'scalar_text') :
-                    alst.append(j + '|')
+                if (argtype == b'text') or (argtype == b'scalar_text') :
+                    alst.append(j + b'|')
                else :
-                    alst.append(str(j) + ',')
-            argres = "".join(alst)
+                    alst.append(str(j).encode('utf-8') + b',')
+            argres = b"".join(alst)
            argres = argres[0:-1]
-            if argtype == 'snippets' :
-                rlst.append('snippets:' + argres)
+            if argtype == b'snippets' :
+                rlst.append(b'snippets:' + argres)
            else :
                rlst.append(argres)
        if len(subtagList) > 0 :
-            rlst.append('\n')
+            rlst.append(b'\n')
            for j in subtagList:
                if len(j) > 0 :
                    rlst.append(self.formatTag(j))
-            rlst.append(indent + '</' + nodename + '>\n')
+            rlst.append(indent + b'</' + nodename + b'>\n')
        else:
-            rlst.append('</' + nodename + '>\n')
-        return "".join(rlst)
+            rlst.append(b'</' + nodename + b'>\n')
+        return b"".join(rlst)


    # flatten tag
@ -704,20 +704,20 @@ class PageParser(object):
            alst = []
            for j in argList:
                if (argtype == 'text') or (argtype == 'scalar_text') :
-                    alst.append(j + '|')
+                     alst.append(j + b'|')
                else :
-                    alst.append(str(j) + '|')
-            argres = "".join(alst)
+                    alst.append(str(j).encode('utf-8') + b'|')
+            argres = b"".join(alst)
            argres = argres[0:-1]
-            if argtype == 'snippets' :
-                rlst.append('.snippets=' + argres)
+            if argtype == b'snippets' :
+                rlst.append(b'.snippets=' + argres)
            else :
-                rlst.append('=' + argres)
-        rlst.append('\n')
+                rlst.append(b'=' + argres)
+        rlst.append(b'\n')
        for j in subtagList:
            if len(j) > 0 :
                rlst.append(self.flattenTag(j))
-        return "".join(rlst)
+        return b"".join(rlst)


    # reduce create xml output
@ -729,7 +729,7 @@ class PageParser(object):
                    rlst.append(self.flattenTag(j))
                else:
                    rlst.append(self.formatTag(j))
-        result = "".join(rlst)
+        result = b"".join(rlst)
        if self.debug : print(result)
        return result

@ -747,16 +747,16 @@ class PageParser(object):

        # peek at the first bytes to see what type of file it is
        magic = self.fo.read(9)
-        if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
-            first_token = 'info'
-        elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
+        if (magic[0:1] == b'p') and (magic[2:9] == b'marker_'):
+            first_token = b'info'
+        elif (magic[0:1] == b'p') and (magic[2:9] == b'__PAGE_'):
            skip = self.fo.read(2)
-            first_token = 'info'
-        elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'):
-            first_token = 'info'
-        elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
+            first_token = b'info'
+        elif (magic[0:1] == b'p') and (magic[2:8] == b'_PAGE_'):
+            first_token = b'info'
+        elif (magic[0:1] == b'g') and (magic[2:9] == b'__GLYPH'):
            skip = self.fo.read(3)
-            first_token = 'info'
+            first_token = b'info'
        else :
            # other0.dat file
            first_token = None
@ -778,7 +778,7 @@ class PageParser(object):
                break

            if (v == 0x72):
-                self.doLoop72('number')
+                self.doLoop72(b'number')
            elif (v > 0) and (v < self.dict.getSize()) :
                tag = self.procToken(self.dict.lookup(v))
                if len(tag) > 0 :
@ -789,7 +789,7 @@ class PageParser(object):
                if (v == 0):
                    if (self.peek(1) == 0x5f):
                        skip = self.fo.read(1)
-                        first_token = 'info'
+                        first_token = b'info'

        # now do snippet injection
        if len(self.snippetList) > 0 :
@ -809,14 +809,14 @@ class PageParser(object):

 def fromData(dict, fname):
    flat_xml = True
-    debug = False
+    debug = True
    pp = PageParser(fname, dict, debug, flat_xml)
    xmlpage = pp.process()
    return xmlpage

 def getXML(dict, fname):
    flat_xml = False
-    debug = False
+    debug = True
    pp = PageParser(fname, dict, debug, flat_xml)
    xmlpage = pp.process()
    return xmlpage
@ -845,7 +845,7 @@ def main(argv):
    sys.stderr=SafeUnbuffered(sys.stderr)
    dictFile = ""
    pageFile = ""
-    debug = False
+    debug = True
    flat_xml = False
    printOutput = False
    if len(argv) == 0:
--- a/DeDRM_plugin/flatxml2html.py
+++ b/DeDRM_plugin/flatxml2html.py
@ -7,6 +7,7 @@ import csv
 import os
 import math
 import getopt
+import functools
 from struct import pack
 from struct import unpack

@ -15,14 +16,14 @@ class DocParser(object):
    def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
        self.id = os.path.basename(fileid).replace('.dat','')
        self.svgcount = 0
-        self.docList = flatxml.split('\n')
+        self.docList = flatxml.split(b'\n')
        self.docSize = len(self.docList)
        self.classList = {}
        self.bookDir = bookDir
        self.gdict = gdict
        tmpList = classlst.split('\n')
        for pclass in tmpList:
-            if pclass != '':
+            if pclass != b'':
                # remove the leading period from the css name
                cname = pclass[1:]
            self.classList[cname] = True
@ -57,9 +58,9 @@ class DocParser(object):
        imgfile = os.path.join(imgDir,imgname)

        # get glyph information
-        gxList = self.getData('info.glyph.x',0,-1)
-        gyList = self.getData('info.glyph.y',0,-1)
-        gidList = self.getData('info.glyph.glyphID',0,-1)
+        gxList = self.getData(b'info.glyph.x',0,-1)
+        gyList = self.getData(b'info.glyph.y',0,-1)
+        gidList = self.getData(b'info.glyph.glyphID',0,-1)

        gids = []
        maxws = []
@ -122,11 +123,11 @@ class DocParser(object):
    def lineinDoc(self, pos) :
        if (pos >= 0) and (pos < self.docSize) :
            item = self.docList[pos]
-            if item.find('=') >= 0:
-                (name, argres) = item.split('=',1)
+            if item.find(b'=') >= 0:
+                (name, argres) = item.split(b'=',1)
            else :
                name = item
-                argres = ''
+                argres = b''
        return name, argres


@ -140,11 +141,13 @@ class DocParser(object):
        foundat = -1
        for j in range(pos, end):
            item = self.docList[j]
-            if item.find('=') >= 0:
-                (name, argres) = item.split('=',1)
+            if item.find(b'=') >= 0:
+                (name, argres) = item.split(b'=',1)
            else :
                name = item
                argres = ''
+            if (isinstance(tagpath,str)):
+                tagpath = tagpath.encode('utf-8')
            if name.endswith(tagpath) :
                result = argres
                foundat = j
@ -170,7 +173,7 @@ class DocParser(object):
        argres=[]
        (foundat, argt) = self.findinDoc(tagpath, pos, end)
        if (argt != None) and (len(argt) > 0) :
-            argList = argt.split('|')
+            argList = argt.split(b'|')
            argres = [ int(strval) for strval in argList]
        return argres

@ -191,21 +194,21 @@ class DocParser(object):

        # also some class names have spaces in them so need to convert to dashes
        if nclass != None :
-            nclass = nclass.replace(' ','-')
-            classres = ''
+            nclass = nclass.replace(b' ',b'-')
+            classres = b''
            nclass = nclass.lower()
-            nclass = 'cl-' + nclass
-            baseclass = ''
+            nclass = b'cl-' + nclass
+            baseclass = b''
            # graphic is the base class for captions
-            if nclass.find('cl-cap-') >=0 :
-                classres = 'graphic' + ' '
+            if nclass.find(b'cl-cap-') >=0 :
+                classres = b'graphic' + b' '
            else :
                # strip to find baseclass
-                p = nclass.find('_')
+                p = nclass.find(b'_')
                if p > 0 :
                    baseclass = nclass[0:p]
                    if baseclass in self.classList:
-                        classres += baseclass + ' '
+                        classres += baseclass + b' '
            classres += nclass
            nclass = classres
        return nclass
@ -225,11 +228,11 @@ class DocParser(object):
            return -1

        result = []
-        (pos, pagetype) = self.findinDoc('page.type',0,-1)
+        (pos, pagetype) = self.findinDoc(b'page.type',0,-1)

-        groupList = self.posinDoc('page.group')
-        groupregionList = self.posinDoc('page.group.region')
-        pageregionList = self.posinDoc('page.region')
+        groupList = self.posinDoc(b'page.group')
+        groupregionList = self.posinDoc(b'page.group.region')
+        pageregionList = self.posinDoc(b'page.region')
        # integrate into one list
        for j in groupList:
            result.append(('grpbeg',j))
@ -237,7 +240,7 @@ class DocParser(object):
            result.append(('gregion',j))
        for j in pageregionList:
            result.append(('pregion',j))
-        result.sort(compare)
+        result.sort(key=functools.cmp_to_key(compare))

        # insert group end and page end indicators
        inGroup = False
@ -267,33 +270,33 @@ class DocParser(object):
        result = []

        # paragraph
-        (pos, pclass) = self.findinDoc('paragraph.class',start,end)
+        (pos, pclass) = self.findinDoc(b'paragraph.class',start,end)

        pclass = self.getClass(pclass)

        # if paragraph uses extratokens (extra glyphs) then make it fixed
-        (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end)
+        (pos, extraglyphs) = self.findinDoc(b'paragraph.extratokens',start,end)

        # build up a description of the paragraph in result and return it
        # first check for the  basic - all words paragraph
-        (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
-        (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
+        (pos, sfirst) = self.findinDoc(b'paragraph.firstWord',start,end)
+        (pos, slast) = self.findinDoc(b'paragraph.lastWord',start,end)
        if (sfirst != None) and (slast != None) :
            first = int(sfirst)
            last = int(slast)

-            makeImage = (regtype == 'vertical') or (regtype == 'table')
+            makeImage = (regtype == b'vertical') or (regtype == b'table')
            makeImage = makeImage or (extraglyphs != None)
            if self.fixedimage:
-                makeImage = makeImage or (regtype == 'fixed')
+                makeImage = makeImage or (regtype == b'fixed')

            if (pclass != None):
-                makeImage = makeImage or (pclass.find('.inverted') >= 0)
+                makeImage = makeImage or (pclass.find(b'.inverted') >= 0)
                if self.fixedimage :
-                    makeImage = makeImage or (pclass.find('cl-f-') >= 0)
+                    makeImage = makeImage or (pclass.find(b'cl-f-') >= 0)

            # before creating an image make sure glyph info exists
-            gidList = self.getData('info.glyph.glyphID',0,-1)
+            gidList = self.getData(b'info.glyph.glyphID',0,-1)

            makeImage = makeImage & (len(gidList) > 0)

@ -307,8 +310,8 @@ class DocParser(object):
            # translate first and last word into first and last glyphs
            # and generate inline image and include it
            glyphList = []
-            firstglyphList = self.getData('word.firstGlyph',0,-1)
-            gidList = self.getData('info.glyph.glyphID',0,-1)
+            firstglyphList = self.getData(b'word.firstGlyph',0,-1)
+            gidList = self.getData(b'info.glyph.glyphID',0,-1)
            firstGlyph = firstglyphList[first]
            if last < len(firstglyphList):
                lastGlyph = firstglyphList[last]
@ -326,8 +329,8 @@ class DocParser(object):
            for glyphnum in range(firstGlyph, lastGlyph):
                glyphList.append(glyphnum)
            # include any extratokens if they exist
-            (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
-            (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
+            (pos, sfg) = self.findinDoc(b'extratokens.firstGlyph',start,end)
+            (pos, slg) = self.findinDoc(b'extratokens.lastGlyph',start,end)
            if (sfg != None) and (slg != None):
                for glyphnum in range(int(sfg), int(slg)):
                    glyphList.append(glyphnum)
@ -368,39 +371,39 @@ class DocParser(object):

            (name, argres) = self.lineinDoc(line)

-            if name.endswith('span.firstWord') :
+            if name.endswith(b'span.firstWord') :
                sp_first = int(argres)

-            elif name.endswith('span.lastWord') :
+            elif name.endswith(b'span.lastWord') :
                sp_last = int(argres)

-            elif name.endswith('word.firstGlyph') :
+            elif name.endswith(b'word.firstGlyph') :
                gl_first = int(argres)

-            elif name.endswith('word.lastGlyph') :
+            elif name.endswith(b'word.lastGlyph') :
                gl_last = int(argres)

-            elif name.endswith('word_semantic.firstWord'):
+            elif name.endswith(b'word_semantic.firstWord'):
                ws_first = int(argres)

-            elif name.endswith('word_semantic.lastWord'):
+            elif name.endswith(b'word_semantic.lastWord'):
                ws_last = int(argres)

-            elif name.endswith('word.class'):
+            elif name.endswith(b'word.class'):
                # we only handle spaceafter word class
                try:
-                    (cname, space) = argres.split('-',1)
-                    if space == '' : space = '0'
-                    if (cname == 'spaceafter') and (int(space) > 0) :
+                    (cname, space) = argres.split(b'-',1)
+                    if space == b'' : space = b'0'
+                    if (cname == b'spaceafter') and (int(space) > 0) :
                        word_class = 'sa'
                except:
                    pass

-            elif name.endswith('word.img.src'):
+            elif name.endswith(b'word.img.src'):
                result.append(('img' + word_class, int(argres)))
                word_class = ''

-            elif name.endswith('region.img.src'):
+            elif name.endswith(b'region.img.src'):
                result.append(('img' + word_class, int(argres)))

            if (sp_first != -1) and (sp_last != -1):
@ -437,7 +440,7 @@ class DocParser(object):

        classres = ''
        if pclass :
-            classres = ' class="' + pclass + '"'
+            classres = ' class="' + pclass.decode('utf-8') + '"'

        br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')

@ -470,8 +473,8 @@ class DocParser(object):
                    if (link > 0):
                        linktype = self.link_type[link-1]
                        title = self.link_title[link-1]
-                        if (title == "") or (parares.rfind(title) < 0):
-                            title=parares[lstart:]
+                        if (title == b"") or (parares.rfind(title.decode('utf-8')) < 0):
+                            title=parares[lstart:].encode('utf-8')
                        if linktype == 'external' :
                            linkhref = self.link_href[link-1]
                            linkhtml = '<a href="%s">' % linkhref
@ -482,33 +485,34 @@ class DocParser(object):
                            else :
                                # just link to the current page
                                linkhtml = '<a href="#' + self.id + '">'
-                        linkhtml += title + '</a>'
-                        pos = parares.rfind(title)
+                        linkhtml += title.decode('utf-8')
+                        linkhtml += '</a>'
+                        pos = parares.rfind(title.decode('utf-8'))
                        if pos >= 0:
                            parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
                        else :
                            parares += linkhtml
                        lstart = len(parares)
-                        if word == '_link_' : word = ''
+                        if word == b'_link_' : word = b''
                    elif (link < 0) :
-                        if word == '_link_' : word = ''
+                        if word == b'_link_' : word = b''

-                if word == '_lb_':
+                if word == b'_lb_':
                    if ((num-1) in self.dehyphen_rootid ) or handle_links:
-                        word = ''
+                        word = b''
                        sep = ''
                    elif br_lb :
-                        word = '<br />\n'
+                        word = b'<br />\n'
                        sep = ''
                    else :
-                        word = '\n'
+                        word = b'\n'
                        sep = ''

                if num in self.dehyphen_rootid :
                    word = word[0:-1]
                    sep = ''

-                parares += word + sep
+                parares += word.decode('utf-8') + sep

            elif wtype == 'img' :
                sep = ''
@ -522,7 +526,9 @@ class DocParser(object):

            elif wtype == 'svg' :
                sep = ''
-                parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
+                parares += '<img src="img/'
+                parares += self.id
+                parares += '_%04d.svg" alt="" />' % num
                parares += sep

        if len(sep) > 0 : parares = parares[0:-1]
@ -545,7 +551,7 @@ class DocParser(object):
            (wtype, num) = pdesc[j]

            if wtype == 'ocr' :
-                word = self.ocrtext[num]
+                word = self.ocrtext[num].decode('utf-8')
                sep = ' '

                if handle_links:
@ -553,7 +559,7 @@ class DocParser(object):
                    if (link > 0):
                        linktype = self.link_type[link-1]
                        title = self.link_title[link-1]
-                        title = title.rstrip('. ')
+                        title = title.rstrip(b'. ')
                        alt_title = parares[lstart:]
                        alt_title = alt_title.strip()
                        # now strip off the actual printed page number
@ -607,38 +613,38 @@ class DocParser(object):
        hlst = []

        # get the ocr text
-        (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
-        if argres :  self.ocrtext = argres.split('|')
+        (pos, argres) = self.findinDoc(b'info.word.ocrText',0,-1)
+        if argres :  self.ocrtext = argres.split(b'|')

        # get information to dehyphenate the text
-        self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
+        self.dehyphen_rootid = self.getData(b'info.dehyphen.rootID',0,-1)

        # determine if first paragraph is continued from previous page
-        (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
+        (pos, self.parastems_stemid) = self.findinDoc(b'info.paraStems.stemID',0,-1)
        first_para_continued = (self.parastems_stemid  != None)

        # determine if last paragraph is continued onto the next page
-        (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
+        (pos, self.paracont_stemid) = self.findinDoc(b'info.paraCont.stemID',0,-1)
        last_para_continued = (self.paracont_stemid != None)

        # collect link ids
-        self.link_id = self.getData('info.word.link_id',0,-1)
+        self.link_id = self.getData(b'info.word.link_id',0,-1)

        # collect link destination page numbers
-        self.link_page = self.getData('info.links.page',0,-1)
+        self.link_page = self.getData(b'info.links.page',0,-1)

        # collect link types (container versus external)
-        (pos, argres) = self.findinDoc('info.links.type',0,-1)
-        if argres :  self.link_type = argres.split('|')
+        (pos, argres) = self.findinDoc(b'info.links.type',0,-1)
+        if argres :  self.link_type = argres.split(b'|')

        # collect link destinations
-        (pos, argres) = self.findinDoc('info.links.href',0,-1)
-        if argres :  self.link_href = argres.split('|')
+        (pos, argres) = self.findinDoc(b'info.links.href',0,-1)
+        if argres :  self.link_href = argres.split(b'|')

        # collect link titles
-        (pos, argres) = self.findinDoc('info.links.title',0,-1)
+        (pos, argres) = self.findinDoc(b'info.links.title',0,-1)
        if argres :
-            self.link_title = argres.split('|')
+            self.link_title = argres.split(b'|')
        else:
            self.link_title.append('')

@ -662,51 +668,51 @@ class DocParser(object):
            # set anchor for link target on this page
            if not anchorSet and not first_para_continued:
                hlst.append('<div style="visibility: hidden; height: 0; width: 0;" id="')
-                hlst.append(self.id + '" title="pagetype_' + pagetype + '"></div>\n')
+                hlst.append(self.id + '" title="pagetype_' + pagetype.decode('utf-8') + '"></div>\n')
                anchorSet = True

            # handle groups of graphics with text captions
-            if (etype == 'grpbeg'):
-                (pos, grptype) = self.findinDoc('group.type', start, end)
+            if (etype == b'grpbeg'):
+                (pos, grptype) = self.findinDoc(b'group.type', start, end)
                if grptype != None:
-                    if grptype == 'graphic':
-                        gcstr = ' class="' + grptype + '"'
+                    if grptype == b'graphic':
+                        gcstr = ' class="' + grptype.decode('utf-8') + '"'
                        hlst.append('<div' + gcstr + '>')
                        inGroup = True

-            elif (etype == 'grpend'):
+            elif (etype == b'grpend'):
                if inGroup:
                    hlst.append('</div>\n')
                    inGroup = False

            else:
-                (pos, regtype) = self.findinDoc('region.type',start,end)
+                (pos, regtype) = self.findinDoc(b'region.type',start,end)

-                if regtype == 'graphic' :
-                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                if regtype == b'graphic' :
+                    (pos, simgsrc) = self.findinDoc(b'img.src',start,end)
                    if simgsrc:
                        if inGroup:
                            hlst.append('<img src="img/img%04d.jpg" alt="" />' % int(simgsrc))
                        else:
                            hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))

-                elif regtype == 'chapterheading' :
+                elif regtype == b'chapterheading' :
                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
                    if not breakSet:
                        hlst.append('<div style="page-break-after: always;">&nbsp;</div>\n')
                        breakSet = True
                    tag = 'h1'
                    if pclass and (len(pclass) >= 7):
-                        if pclass[3:7] == 'ch1-' : tag = 'h1'
-                        if pclass[3:7] == 'ch2-' : tag = 'h2'
-                        if pclass[3:7] == 'ch3-' : tag = 'h3'
-                        hlst.append('<' + tag + ' class="' + pclass + '">')
+                        if pclass[3:7] == b'ch1-' : tag = 'h1'
+                        if pclass[3:7] == b'ch2-' : tag = 'h2'
+                        if pclass[3:7] == b'ch3-' : tag = 'h3'
+                        hlst.append('<' + tag + ' class="' + pclass.decode('utf-8') + '">')
                    else:
                        hlst.append('<' + tag + '>')
                    hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
                    hlst.append('</' + tag + '>')

-                elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
+                elif (regtype == b'text') or (regtype == b'fixed') or (regtype == b'insert') or (regtype == b'listitem'):
                    ptype = 'full'
                    # check to see if this is a continution from the previous page
                    if first_para_continued :
@ -715,16 +721,16 @@ class DocParser(object):
                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
                    if pclass and (len(pclass) >= 6) and (ptype == 'full'):
                        tag = 'p'
-                        if pclass[3:6] == 'h1-' : tag = 'h4'
-                        if pclass[3:6] == 'h2-' : tag = 'h5'
-                        if pclass[3:6] == 'h3-' : tag = 'h6'
-                        hlst.append('<' + tag + ' class="' + pclass + '">')
+                        if pclass[3:6] == b'h1-' : tag = 'h4'
+                        if pclass[3:6] == b'h2-' : tag = 'h5'
+                        if pclass[3:6] == b'h3-' : tag = 'h6'
+                        hlst.append('<' + tag + ' class="' + pclass.decode('utf-8') + '">')
                        hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
                        hlst.append('</' + tag + '>')
                    else :
                        hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))

-                elif (regtype == 'tocentry') :
+                elif (regtype == b'tocentry') :
                    ptype = 'full'
                    if first_para_continued :
                        ptype = 'end'
@ -733,7 +739,7 @@ class DocParser(object):
                    tocinfo += self.buildTOCEntry(pdesc)
                    hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))

-                elif (regtype == 'vertical') or (regtype == 'table') :
+                elif (regtype == b'vertical') or (regtype == b'table') :
                    ptype = 'full'
                    if inGroup:
                        ptype = 'middle'
@ -744,19 +750,19 @@ class DocParser(object):
                    hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))


-                elif (regtype == 'synth_fcvr.center'):
-                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                elif (regtype == b'synth_fcvr.center'):
+                    (pos, simgsrc) = self.findinDoc(b'img.src',start,end)
                    if simgsrc:
                        hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))

                else :
                    print('          Making region type', regtype, end=' ')
-                    (pos, temp) = self.findinDoc('paragraph',start,end)
-                    (pos2, temp) = self.findinDoc('span',start,end)
+                    (pos, temp) = self.findinDoc(b'paragraph',start,end)
+                    (pos2, temp) = self.findinDoc(b'span',start,end)
                    if pos != -1 or pos2 != -1:
                        print(' a "text" region')
                        orig_regtype = regtype
-                        regtype = 'fixed'
+                        regtype = b'fixed'
                        ptype = 'full'
                        # check to see if this is a continution from the previous page
                        if first_para_continued :
@ -764,23 +770,23 @@ class DocParser(object):
                            first_para_continued = False
                        (pclass, pdesc) = self.getParaDescription(start,end, regtype)
                        if not pclass:
-                            if orig_regtype.endswith('.right')     : pclass = 'cl-right'
-                            elif orig_regtype.endswith('.center')  : pclass = 'cl-center'
-                            elif orig_regtype.endswith('.left')    : pclass = 'cl-left'
-                            elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
+                            if orig_regtype.endswith(b'.right')     : pclass = 'cl-right'
+                            elif orig_regtype.endswith(b'.center')  : pclass = 'cl-center'
+                            elif orig_regtype.endswith(b'.left')    : pclass = 'cl-left'
+                            elif orig_regtype.endswith(b'.justify') : pclass = 'cl-justify'
                        if pclass and (ptype == 'full') and (len(pclass) >= 6):
                            tag = 'p'
-                            if pclass[3:6] == 'h1-' : tag = 'h4'
-                            if pclass[3:6] == 'h2-' : tag = 'h5'
-                            if pclass[3:6] == 'h3-' : tag = 'h6'
-                            hlst.append('<' + tag + ' class="' + pclass + '">')
+                            if pclass[3:6] == b'h1-' : tag = 'h4'
+                            if pclass[3:6] == b'h2-' : tag = 'h5'
+                            if pclass[3:6] == b'h3-' : tag = 'h6'
+                            hlst.append('<' + tag + ' class="' + pclass.decode('utf-8') + '">')
                            hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
                            hlst.append('</' + tag + '>')
                        else :
                            hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
                    else :
                        print(' a "graphic" region')
-                        (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                        (pos, simgsrc) = self.findinDoc(b'img.src',start,end)
                        if simgsrc:
                            hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))

--- a/DeDRM_plugin/flatxml2svg.py
+++ b/DeDRM_plugin/flatxml2svg.py
@ -12,7 +12,7 @@ from struct import unpack
 class PParser(object):
    def __init__(self, gd, flatxml, meta_array):
        self.gd = gd
-        self.flatdoc = flatxml.split('\n')
+        self.flatdoc = flatxml.split(b'\n')
        self.docSize = len(self.flatdoc)
        self.temp = []

@ -58,11 +58,11 @@ class PParser(object):
    def lineinDoc(self, pos) :
        if (pos >= 0) and (pos < self.docSize) :
            item = self.flatdoc[pos]
-            if item.find('=') >= 0:
-                (name, argres) = item.split('=',1)
+            if item.find(b'=') >= 0:
+                (name, argres) = item.split(b'=',1)
            else :
                name = item
-                argres = ''
+                argres = b''
        return name, argres

    # find tag in doc if within pos to end inclusive
@ -75,11 +75,13 @@ class PParser(object):
        foundat = -1
        for j in range(pos, end):
            item = self.flatdoc[j]
-            if item.find('=') >= 0:
-                (name, argres) = item.split('=',1)
+            if item.find(b'=') >= 0:
+                (name, argres) = item.split(b'=',1)
            else :
                name = item
-                argres = ''
+                argres = b''
+            if (isinstance(tagpath,str)):
+                tagpath = tagpath.encode('utf-8')
            if name.endswith(tagpath) :
                result = argres
                foundat = j
@ -103,9 +105,9 @@ class PParser(object):
        cnt = len(self.flatdoc)
        for j in range(cnt):
            item = self.flatdoc[j]
-            if item.find('=') >= 0:
-                (name, argt) = item.split('=')
-                argres = argt.split('|')
+            if item.find(b'=') >= 0:
+                (name, argt) = item.split(b'=')
+                argres = argt.split(b'|')
            else:
                name = item
                argres = []
@ -120,15 +122,17 @@ class PParser(object):
    def getDataatPos(self, path, pos):
        result = None
        item = self.flatdoc[pos]
-        if item.find('=') >= 0:
-            (name, argt) = item.split('=')
-            argres = argt.split('|')
+        if item.find(b'=') >= 0:
+            (name, argt) = item.split(b'=')
+            argres = argt.split(b'|')
        else:
            name = item
            argres = []
        if (len(argres) > 0) :
            for j in range(0,len(argres)):
                argres[j] = int(argres[j])
+        if (isinstance(path,str)):
+            path = path.encode('utf-8')
        if (name.endswith(path)):
            result = argres
        return result
@ -138,12 +142,14 @@ class PParser(object):
        cnt = len(self.temp)
        for j in range(cnt):
            item = self.temp[j]
-            if item.find('=') >= 0:
-                (name, argt) = item.split('=')
-                argres = argt.split('|')
+            if item.find(b'=') >= 0:
+                (name, argt) = item.split(b'=')
+                argres = argt.split(b'|')
            else:
                name = item
                argres = []
+            if (isinstance(path,str)):
+                path = path.encode('utf-8')
            if (name.endswith(path)):
                result = argres
                self.temp.pop(j)
--- a/DeDRM_plugin/genbook.py
+++ b/DeDRM_plugin/genbook.py
@ -44,10 +44,10 @@ if inCalibre :
    from calibre_plugins.dedrm import flatxml2svg
    from calibre_plugins.dedrm import stylexml2css
 else :
-    from . import convert2xml
-    from . import flatxml2html
-    from . import flatxml2svg
-    from . import stylexml2css
+    import convert2xml
+    import flatxml2html
+    import flatxml2svg
+    import stylexml2css

 # global switch
 buildXML = False
@ -117,10 +117,10 @@ class Dictionary(object):
            self.stable.append(self.escapestr(readString(self.fo)))
        self.pos = 0
    def escapestr(self, str):
-        str = str.replace('&','&amp;')
-        str = str.replace('<','&lt;')
-        str = str.replace('>','&gt;')
-        str = str.replace('=','&#61;')
+        str = str.replace(b'&',b'&amp;')
+        str = str.replace(b'<',b'&lt;')
+        str = str.replace(b'>',b'&gt;')
+        str = str.replace(b'=',b'&#61;')
        return str
    def lookup(self,val):
        if ((val >= 0) and (val < self.size)) :
@ -138,7 +138,7 @@ class Dictionary(object):

 class PageDimParser(object):
    def __init__(self, flatxml):
-        self.flatdoc = flatxml.split('\n')
+        self.flatdoc = flatxml.split(b'\n')
    # find tag if within pos to end inclusive
    def findinDoc(self, tagpath, pos, end) :
        result = None
@ -151,8 +151,8 @@ class PageDimParser(object):
        foundat = -1
        for j in range(pos, end):
            item = docList[j]
-            if item.find('=') >= 0:
-                (name, argres) = item.split('=')
+            if item.find(b'=') >= 0:
+                (name, argres) = item.split(b'=')
            else :
                name = item
                argres = ''
@ -162,8 +162,8 @@ class PageDimParser(object):
                break
        return foundat, result
    def process(self):
-        (pos, sph) = self.findinDoc('page.h',0,-1)
-        (pos, spw) = self.findinDoc('page.w',0,-1)
+        (pos, sph) = self.findinDoc(b'page.h',0,-1)
+        (pos, spw) = self.findinDoc(b'page.w',0,-1)
        if (sph == None): sph = '-1'
        if (spw == None): spw = '-1'
        return sph, spw
@ -176,21 +176,21 @@ def getPageDim(flatxml):

 class GParser(object):
    def __init__(self, flatxml):
-        self.flatdoc = flatxml.split('\n')
+        self.flatdoc = flatxml.split(b'\n')
        self.dpi = 1440
-        self.gh = self.getData('info.glyph.h')
-        self.gw = self.getData('info.glyph.w')
-        self.guse = self.getData('info.glyph.use')
+        self.gh = self.getData(b'info.glyph.h')
+        self.gw = self.getData(b'info.glyph.w')
+        self.guse = self.getData(b'info.glyph.use')
        if self.guse :
            self.count = len(self.guse)
        else :
            self.count = 0
-        self.gvtx = self.getData('info.glyph.vtx')
-        self.glen = self.getData('info.glyph.len')
-        self.gdpi = self.getData('info.glyph.dpi')
-        self.vx = self.getData('info.vtx.x')
-        self.vy = self.getData('info.vtx.y')
-        self.vlen = self.getData('info.len.n')
+        self.gvtx = self.getData(b'info.glyph.vtx')
+        self.glen = self.getData(b'info.glyph.len')
+        self.gdpi = self.getData(b'info.glyph.dpi')
+        self.vx = self.getData(b'info.vtx.x')
+        self.vy = self.getData(b'info.vtx.y')
+        self.vlen = self.getData(b'info.len.n')
        if self.vlen :
            self.glen.append(len(self.vlen))
        elif self.glen:
@ -204,9 +204,9 @@ class GParser(object):
        cnt = len(self.flatdoc)
        for j in range(cnt):
            item = self.flatdoc[j]
-            if item.find('=') >= 0:
-                (name, argt) = item.split('=')
-                argres = argt.split('|')
+            if item.find(b'=') >= 0:
+                (name, argt) = item.split(b'=')
+                argres = argt.split(b'|')
            else:
                name = item
                argres = []
@ -431,7 +431,7 @@ def generateBook(bookDir, raw, fixedimage):

    # now get the css info
    cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
-    open(xname, 'wb').write(cssstr)
+    open(xname, 'w').write(cssstr)
    if buildXML:
        xname = os.path.join(xmlDir, 'other0000.xml')
        open(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
@ -525,7 +525,7 @@ def generateBook(bookDir, raw, fixedimage):
    hlst.append('</body>\n</html>\n')
    htmlstr = "".join(hlst)
    hlst = None
-    open(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
+    open(os.path.join(bookDir, htmlFileName), 'w').write(htmlstr)

    print(" ")
    print('Extracting Table of Contents from Amazon OCR')
@ -571,7 +571,7 @@ def generateBook(bookDir, raw, fixedimage):
    tlst.append('</body>\n')
    tlst.append('</html>\n')
    tochtml = "".join(tlst)
-    open(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml)
+    open(os.path.join(svgDir, 'toc.xhtml'), 'w').write(tochtml)


    # now create index_svg.xhtml that points to all required files
@ -608,7 +608,7 @@ def generateBook(bookDir, raw, fixedimage):
        flst = []
        for page in pagelst:
            flst.append(xmllst[page])
-        flat_svg = "".join(flst)
+        flat_svg = b"".join(flst)
        flst=None
        svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi)
        if (raw) :
@ -626,7 +626,7 @@ def generateBook(bookDir, raw, fixedimage):
    slst.append('</body>\n</html>\n')
    svgindex = "".join(slst)
    slst = None
-    open(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
+    open(os.path.join(bookDir, 'index_svg.xhtml'), 'w').write(svgindex)

    print(" ")

@ -637,16 +637,16 @@ def generateBook(bookDir, raw, fixedimage):
    olst.append('<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n')
    # adding metadata
    olst.append('   <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n')
-    if 'GUID' in meta_array:
-        olst.append('      <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n')
-    if 'ASIN' in meta_array:
-        olst.append('      <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n')
-    if 'oASIN' in meta_array:
-        olst.append('      <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n')
-    olst.append('      <dc:title>' + meta_array['Title'] + '</dc:title>\n')
-    olst.append('      <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n')
+    if b'GUID' in meta_array:
+        olst.append('      <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array[b'GUID'].decode('utf-8') + '</dc:identifier>\n')
+    if b'ASIN' in meta_array:
+        olst.append('      <dc:identifier opf:scheme="ASIN">' + meta_array[b'ASIN'].decode('utf-8') + '</dc:identifier>\n')
+    if b'oASIN' in meta_array:
+        olst.append('      <dc:identifier opf:scheme="oASIN">' + meta_array[b'oASIN'].decode('utf-8') + '</dc:identifier>\n')
+    olst.append('      <dc:title>' + meta_array[b'Title'].decode('utf-8') + '</dc:title>\n')
+    olst.append('      <dc:creator opf:role="aut">' + meta_array[b'Authors'].decode('utf-8') + '</dc:creator>\n')
    olst.append('      <dc:language>en</dc:language>\n')
-    olst.append('      <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n')
+    olst.append('      <dc:date>' + meta_array[b'UpdateTime'].decode('utf-8') + '</dc:date>\n')
    if isCover:
        olst.append('      <meta name="cover" content="bookcover"/>\n')
    olst.append('   </metadata>\n')
@ -675,7 +675,7 @@ def generateBook(bookDir, raw, fixedimage):
    olst.append('</package>\n')
    opfstr = "".join(olst)
    olst = None
-    open(opfname, 'wb').write(opfstr)
+    open(opfname, 'w').write(opfstr)

    print('Processing Complete')

--- a/DeDRM_plugin/kgenpids.py
+++ b/DeDRM_plugin/kgenpids.py
@ -49,14 +49,15 @@ def SHA1(message):


 # Encode the bytes in data with the characters in map
+# data and map should be byte arrays
 def encode(data, map):
-    result = ''
+    result = b''
    for char in data:
-        value = ord(char)
+        value = char
        Q = (value ^ 0x80) // len(map)
        R = value % len(map)
-        result += map[Q]
-        result += map[R]
+        result += bytes([map[Q]])
+        result += bytes([map[R]])
    return result

 # Hash the bytes in data and then encode the digest with the characters in map
@ -117,7 +118,7 @@ def generatePidEncryptionTable() :
 def generatePidSeed(table,dsn) :
    value = 0
    for counter in range (0,4) :
-        index = (ord(dsn[counter]) ^ value) &0xFF
+        index = (dsn[counter] ^ value) & 0xFF
        value = (value >> 8) ^ table[index]
    return value

@ -129,7 +130,7 @@ def generateDevicePID(table,dsn,nbRoll):
    pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
    index = 0
    for counter in range (0,nbRoll):
-        pid[index] = pid[index] ^ ord(dsn[counter])
+        pid[index] = pid[index] ^ dsn[counter]
        index = (index+1) %8
    for counter in range (0,8):
        index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
@ -205,7 +206,7 @@ def getK4Pids(rec209, token, kindleDatabase):

    try:
        # Get the kindle account token, if present
-        kindleAccountToken = bytearray.fromhex((kindleDatabase[1])[b'kindle.account.tokens']).decode()
+        kindleAccountToken = bytearray.fromhex((kindleDatabase[1])['kindle.account.tokens'])

    except KeyError:
        kindleAccountToken=""
@ -213,30 +214,30 @@ def getK4Pids(rec209, token, kindleDatabase):

    try:
        # Get the DSN token, if present
-        DSN = bytearray.fromhex((kindleDatabase[1])['DSN']).decode()
+        DSN = bytearray.fromhex((kindleDatabase[1])['DSN'])
        print("Got DSN key from database {0}".format(kindleDatabase[0]))
    except KeyError:
        # See if we have the info to generate the DSN
        try:
            # Get the Mazama Random number
-            MazamaRandomNumber = bytearray.fromhex((kindleDatabase[1])[b'MazamaRandomNumber']).decode()
+            MazamaRandomNumber = bytearray.fromhex((kindleDatabase[1])['MazamaRandomNumber'])
            #print "Got MazamaRandomNumber from database {0}".format(kindleDatabase[0])

            try:
                # Get the SerialNumber token, if present
-                IDString = bytearray.fromhex((kindleDatabase[1])[b'SerialNumber']).decode()
+                IDString = bytearray.fromhex((kindleDatabase[1])['SerialNumber'])
                print("Got SerialNumber from database {0}".format(kindleDatabase[0]))
            except KeyError:
                 # Get the IDString we added
-                IDString = bytearray.fromhex((kindleDatabase[1])[b'IDString']).decode()
+                IDString = bytearray.fromhex((kindleDatabase[1])['IDString'])

            try:
                # Get the UsernameHash token, if present
-                encodedUsername = bytearray.fromhex((kindleDatabase[1])[b'UsernameHash']).decode()
+                encodedUsername = bytearray.fromhex((kindleDatabase[1])['UsernameHash'])
                print("Got UsernameHash from database {0}".format(kindleDatabase[0]))
            except KeyError:
                # Get the UserName we added
-                UserName = bytearray.fromhex((kindleDatabase[1])[b'UserName']).decode()
+                UserName = bytearray.fromhex((kindleDatabase[1])['UserName'])
                # encode it
                encodedUsername = encodeHash(UserName,charMap1)
                #print "encodedUsername",encodedUsername.encode('hex')
@ -266,19 +267,19 @@ def getK4Pids(rec209, token, kindleDatabase):
    # Compute book PIDs

    # book pid
-    pidHash = SHA1(DSN.encode()+kindleAccountToken.encode()+rec209+token)
+    pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
    bookPID = encodePID(pidHash)
    bookPID = checksumPid(bookPID)
    pids.append(bookPID)

    # variant 1
-    pidHash = SHA1(kindleAccountToken.encode()+rec209+token)
+    pidHash = SHA1(kindleAccountToken+rec209+token)
    bookPID = encodePID(pidHash)
    bookPID = checksumPid(bookPID)
    pids.append(bookPID)

    # variant 2
-    pidHash = SHA1(DSN.encode()+rec209+token)
+    pidHash = SHA1(DSN+rec209+token)
    bookPID = encodePID(pidHash)
    bookPID = checksumPid(bookPID)
    pids.append(bookPID)
--- a/DeDRM_plugin/mobidedrm.py
+++ b/DeDRM_plugin/mobidedrm.py
@ -7,7 +7,7 @@

 from __future__ import print_function
 __license__ = 'GPL v3'
-__version__ = "1.00"
+__version__ = "1.0"

 # This is a python script. You need a Python interpreter to run it.
 # For example, ActiveState Python, which exists for windows.
@ -73,7 +73,7 @@ __version__ = "1.00"
 #  0.40 - moved unicode_argv call inside main for Windows DeDRM compatibility
 #  0.41 - Fixed potential unicode problem in command line calls
 #  0.42 - Added GPL v3 licence. updated/removed some print statements
-#  1.00 - Python 3 compatibility for calibre 5.0
+#  1.0  - Python 3 compatibility for calibre 5.0

 import sys
 import os
@ -330,7 +330,7 @@ class MobiBook:
        }
        title = ''
        codec = 'windows-1252'
-        if self.magic == 'BOOKMOBI':
+        if self.magic == b'BOOKMOBI':
            if 503 in self.meta_array:
                title = self.meta_array[503]
            else:
--- a/DeDRM_plugin/stylexml2css.py
+++ b/DeDRM_plugin/stylexml2css.py
@ -15,36 +15,36 @@ debug = False

 class DocParser(object):
    def __init__(self, flatxml, fontsize, ph, pw):
-        self.flatdoc = flatxml.split('\n')
+        self.flatdoc = flatxml.split(b'\n')
        self.fontsize = int(fontsize)
        self.ph = int(ph) * 1.0
        self.pw = int(pw) * 1.0

    stags = {
-        'paragraph' : 'p',
-        'graphic'   : '.graphic'
+        b'paragraph' : 'p',
+        b'graphic'   : '.graphic'
    }

    attr_val_map = {
-        'hang'            : 'text-indent: ',
-        'indent'          : 'text-indent: ',
-        'line-space'      : 'line-height: ',
-        'margin-bottom'   : 'margin-bottom: ',
-        'margin-left'     : 'margin-left: ',
-        'margin-right'    : 'margin-right: ',
-        'margin-top'      : 'margin-top: ',
-        'space-after'     : 'padding-bottom: ',
+        b'hang'            : 'text-indent: ',
+        b'indent'          : 'text-indent: ',
+        b'line-space'      : 'line-height: ',
+        b'margin-bottom'   : 'margin-bottom: ',
+        b'margin-left'     : 'margin-left: ',
+        b'margin-right'    : 'margin-right: ',
+        b'margin-top'      : 'margin-top: ',
+        b'space-after'     : 'padding-bottom: ',
    }

    attr_str_map = {
-        'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
-        'align-left'   : 'text-align: left;',
-        'align-right'  : 'text-align: right;',
-        'align-justify' : 'text-align: justify;',
-        'display-inline' : 'display: inline;',
-        'pos-left' : 'text-align: left;',
-        'pos-right' : 'text-align: right;',
-        'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+        b'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+        b'align-left'   : 'text-align: left;',
+        b'align-right'  : 'text-align: right;',
+        b'align-justify' : 'text-align: justify;',
+        b'display-inline' : 'display: inline;',
+        b'pos-left' : 'text-align: left;',
+        b'pos-right' : 'text-align: right;',
+        b'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
    }


@ -60,11 +60,13 @@ class DocParser(object):
        foundat = -1
        for j in range(pos, end):
            item = docList[j]
-            if item.find('=') >= 0:
-                (name, argres) = item.split('=',1)
+            if item.find(b'=') >= 0:
+                (name, argres) = item.split(b'=',1)
            else :
                name = item
-                argres = ''
+                argres = b''
+            if (isinstance(tagpath,str)):
+                tagpath = tagpath.encode('utf-8')
            if name.endswith(tagpath) :
                result = argres
                foundat = j
@ -76,7 +78,7 @@ class DocParser(object):
    def posinDoc(self, tagpath):
        startpos = []
        pos = 0
-        res = ""
+        res = b""
        while res != None :
            (foundpos, res) = self.findinDoc(tagpath, pos, -1)
            if res != None :
@ -87,11 +89,11 @@ class DocParser(object):
    # returns a vector of integers for the tagpath
    def getData(self, tagpath, pos, end, clean=False):
        if clean:
-            digits_only = re.compile(r'''([0-9]+)''')
+            digits_only = re.compile(rb'''([0-9]+)''')
        argres=[]
        (foundat, argt) = self.findinDoc(tagpath, pos, end)
        if (argt != None) and (len(argt) > 0) :
-            argList = argt.split('|')
+            argList = argt.split(b'|')
            for strval in argList:
                if clean:
                    m = re.search(digits_only, strval)
@ -109,7 +111,7 @@ class DocParser(object):
        csspage += '.cl-justify { text-align: justify; }\n'

        # generate a list of each <style> starting point in the stylesheet
-        styleList= self.posinDoc('book.stylesheet.style')
+        styleList= self.posinDoc(b'book.stylesheet.style')
        stylecnt = len(styleList)
        styleList.append(-1)

@ -121,30 +123,30 @@ class DocParser(object):
            start = styleList[j]
            end = styleList[j+1]

-            (pos, tag) = self.findinDoc('style._tag',start,end)
+            (pos, tag) = self.findinDoc(b'style._tag',start,end)
            if tag == None :
-                (pos, tag) = self.findinDoc('style.type',start,end)
+                (pos, tag) = self.findinDoc(b'style.type',start,end)

            # Is this something we know how to convert to css
            if tag in self.stags :

                # get the style class
-                (pos, sclass) = self.findinDoc('style.class',start,end)
+                (pos, sclass) = self.findinDoc(b'style.class',start,end)
                if sclass != None:
-                    sclass = sclass.replace(' ','-')
-                    sclass = '.cl-' + sclass.lower()
+                    sclass = sclass.replace(b' ',b'-')
+                    sclass = b'.cl-' + sclass.lower()
                else :
-                    sclass = ''
+                    sclass = b''

                if debug: print('sclass', sclass)

                # check for any "after class" specifiers
-                (pos, aftclass) = self.findinDoc('style._after_class',start,end)
+                (pos, aftclass) = self.findinDoc(b'style._after_class',start,end)
                if aftclass != None:
-                    aftclass = aftclass.replace(' ','-')
-                    aftclass = '.cl-' + aftclass.lower()
+                    aftclass = aftclass.replace(b' ',b'-')
+                    aftclass = b'.cl-' + aftclass.lower()
                else :
-                    aftclass = ''
+                    aftclass = b''

                if debug: print('aftclass', aftclass)

@ -152,34 +154,37 @@ class DocParser(object):

                while True :

-                    (pos1, attr) = self.findinDoc('style.rule.attr', start, end)
-                    (pos2, val) = self.findinDoc('style.rule.value', start, end)
+                    (pos1, attr) = self.findinDoc(b'style.rule.attr', start, end)
+                    (pos2, val) = self.findinDoc(b'style.rule.value', start, end)

                    if debug: print('attr', attr)
                    if debug: print('val', val)

                    if attr == None : break

-                    if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
+                    if (attr == b'display') or (attr == b'pos') or (attr == b'align'):
                        # handle text based attributess
-                        attr = attr + '-' + val
+                        attr = attr + b'-' + val
                        if attr in self.attr_str_map :
-                            cssargs[attr] = (self.attr_str_map[attr], '')
+                            cssargs[attr] = (self.attr_str_map[attr], b'')
                    else :
                        # handle value based attributes
                        if attr in self.attr_val_map :
                            name = self.attr_val_map[attr]
-                            if attr in ('margin-bottom', 'margin-top', 'space-after') :
+                            if attr in (b'margin-bottom', b'margin-top', b'space-after') :
                                scale = self.ph
-                            elif attr in ('margin-right', 'indent', 'margin-left', 'hang') :
+                            elif attr in (b'margin-right', b'indent', b'margin-left', b'hang') :
                                scale = self.pw
-                            elif attr == 'line-space':
+                            elif attr == b'line-space':
                                scale = self.fontsize * 2.0
+                            else:
+                                print("Scale not defined!")
+                                scale = 1.0

                            if val == "":
                                val = 0

-                            if not ((attr == 'hang') and (int(val) == 0)):
+                            if not ((attr == b'hang') and (int(val) == 0)):
                                try:
                                    f = float(val)
                                except:
@ -198,32 +203,32 @@ class DocParser(object):
                    if debug: print('keeping style')
                    # make sure line-space does not go below 100% or above 300% since
                    # it can be wacky in some styles
-                    if 'line-space' in cssargs:
-                        seg = cssargs['line-space'][0]
-                        val = cssargs['line-space'][1]
+                    if b'line-space' in cssargs:
+                        seg = cssargs[b'line-space'][0]
+                        val = cssargs[b'line-space'][1]
                        if val < 1.0: val = 1.0
                        if val > 3.0: val = 3.0
-                        del cssargs['line-space']
-                        cssargs['line-space'] = (self.attr_val_map['line-space'], val)
+                        del cssargs[b'line-space']
+                        cssargs[b'line-space'] = (self.attr_val_map[b'line-space'], val)


                    # handle modifications for css style hanging indents
-                    if 'hang' in cssargs:
-                        hseg = cssargs['hang'][0]
-                        hval = cssargs['hang'][1]
-                        del cssargs['hang']
-                        cssargs['hang'] = (self.attr_val_map['hang'], -hval)
+                    if b'hang' in cssargs:
+                        hseg = cssargs[b'hang'][0]
+                        hval = cssargs[b'hang'][1]
+                        del cssargs[b'hang']
+                        cssargs[b'hang'] = (self.attr_val_map[b'hang'], -hval)
                        mval = 0
                        mseg = 'margin-left: '
                        mval = hval
-                        if 'margin-left' in cssargs:
-                            mseg = cssargs['margin-left'][0]
-                            mval = cssargs['margin-left'][1]
+                        if b'margin-left' in cssargs:
+                            mseg = cssargs[b'margin-left'][0]
+                            mval = cssargs[b'margin-left'][1]
                            if mval < 0: mval = 0
                            mval = hval + mval
-                        cssargs['margin-left'] = (mseg, mval)
-                        if 'indent' in cssargs:
-                            del cssargs['indent']
+                        cssargs[b'margin-left'] = (mseg, mval)
+                        if b'indent' in cssargs:
+                            del cssargs[b'indent']

                    cssline = sclass + ' { '
                    for key in iter(cssargs):
--- a/DeDRM_plugin/topazextract.py
+++ b/DeDRM_plugin/topazextract.py
@ -173,7 +173,7 @@ def decryptRecord(data,PID):
 def decryptDkeyRecord(data,PID):
    record = decryptRecord(data,PID)
    fields = unpack('3sB8sB8s3s',record)
-    if fields[0] != 'PID' or fields[5] != 'pid' :
+    if fields[0] != b'PID' or fields[5] != b'pid' :
        raise DrmException("Didn't find PID magic numbers in record")
    elif fields[1] != 8 or fields[3] != 8 :
        raise DrmException("Record didn't contain correct length fields")
@ -183,11 +183,11 @@ def decryptDkeyRecord(data,PID):

 # Decrypt all dkey records (contain the book PID)
 def decryptDkeyRecords(data,PID):
-    nbKeyRecords = ord(data[0])
+    nbKeyRecords = data[0]
    records = []
    data = data[1:]
    for i in range (0,nbKeyRecords):
-        length = ord(data[0])
+        length = data[0]
        try:
            key = decryptDkeyRecord(data[1:length+1],PID)
            records.append(key)
@ -209,7 +209,7 @@ class TopazBook:
        self.bookMetadata = {}
        self.bookKey = None
        magic = unpack('4s',self.fo.read(4))[0]
-        if magic != 'TPZ0':
+        if magic != b'TPZ0':
            raise DrmException("Parse Error : Invalid Header, not a Topaz file")
        self.parseTopazHeaders()
        self.parseMetadata()
@ -244,9 +244,9 @@ class TopazBook:

    def parseMetadata(self):
        # Parse the metadata record from the book payload and return a list of [key,values]
-        self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords['metadata'][0][0])
+        self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords[b'metadata'][0][0])
        tag = bookReadString(self.fo)
-        if tag != 'metadata' :
+        if tag != b'metadata' :
            raise DrmException("Parse Error : Record Names Don't Match")
        flags = ord(self.fo.read(1))
        nbRecords = ord(self.fo.read(1))
@ -260,18 +260,18 @@ class TopazBook:
        return self.bookMetadata

    def getPIDMetaInfo(self):
-        keysRecord = self.bookMetadata.get('keys','')
-        keysRecordRecord = ''
-        if keysRecord != '':
-            keylst = keysRecord.split(',')
+        keysRecord = self.bookMetadata.get(b'keys',b'')
+        keysRecordRecord = b''
+        if keysRecord != b'':
+            keylst = keysRecord.split(b',')
            for keyval in keylst:
-                keysRecordRecord += self.bookMetadata.get(keyval,'')
+                keysRecordRecord += self.bookMetadata.get(keyval,b'')
        return keysRecord, keysRecordRecord

    def getBookTitle(self):
-        title = ''
-        if 'Title' in self.bookMetadata:
-            title = self.bookMetadata['Title']
+        title = b''
+        if b'Title' in self.bookMetadata:
+            title = self.bookMetadata[b'Title']
        return title.decode('utf-8')

    def setBookKey(self, key):
@ -323,7 +323,7 @@ class TopazBook:
        raw = 0
        fixedimage=True
        try:
-            keydata = self.getBookPayloadRecord('dkey', 0)
+            keydata = self.getBookPayloadRecord(b'dkey', 0)
        except DrmException as e:
            print("no dkey record found, book may not be encrypted")
            print("attempting to extrct files without a book key")
@ -354,7 +354,7 @@ class TopazBook:
                pass
            else:
                bookKey = bookKeys[0]
-                print("Book Key Found! ({0})".format(bookKey.encode('hex')))
+                print("Book Key Found! ({0})".format(bookKey.hex()))
                break

        if not bookKey:
@ -396,26 +396,26 @@ class TopazBook:
        outdir = self.outdir
        for headerRecord in self.bookHeaderRecords:
            name = headerRecord
-            if name != 'dkey':
+            if name != b'dkey':
                ext = ".dat"
-                if name == 'img': ext = ".jpg"
-                if name == 'color' : ext = ".jpg"
-                print("Processing Section: {0}\n. . .".format(name), end=' ')
+                if name == b'img': ext = ".jpg"
+                if name == b'color' : ext = ".jpg"
+                print("Processing Section: {0}\n. . .".format(name.decode('utf-8')), end=' ')
                for index in range (0,len(self.bookHeaderRecords[name])) :
-                    fname = "{0}{1:04d}{2}".format(name,index,ext)
+                    fname = "{0}{1:04d}{2}".format(name.decode('utf-8'),index,ext)
                    destdir = outdir
-                    if name == 'img':
+                    if name == b'img':
                        destdir =  os.path.join(outdir,"img")
-                    if name == 'color':
+                    if name == b'color':
                        destdir =  os.path.join(outdir,"color_img")
-                    if name == 'page':
+                    if name == b'page':
                        destdir =  os.path.join(outdir,"page")
-                    if name == 'glyphs':
+                    if name == b'glyphs':
                        destdir =  os.path.join(outdir,"glyphs")
                    outputFile = os.path.join(destdir,fname)
                    print(".", end=' ')
                    record = self.getBookPayloadRecord(name,index)
-                    if record != '':
+                    if record != b'':
                        open(outputFile, 'wb').write(record)
                print(" ")