# This file es fontenc2mtp.py
#
# (c) 2004. Javier Bezos. License: LPPL.
#
# This file creates mtp files for several
# font encodings.
#
# Very, very quich and dirty, because currently
# I'm not still sure how to carry out the Unicode
# to font transformation. Due to an otp2ocp error,
# {botaccent}<0,> is removed and therefore complex
# composite caracter are not recomposed properly.



import unicodedata

class Unichar:
    def __init__(self, char):
        if type(char) == type(u''):
            self.code = ord(char)
        else:
            self.code = char
        #end
            
        self.char = unichr(self.code)
        try: self.name = unicodedata.name(self.char)
        except: pass

        # To be done with a recursive function:   
        tempa = unicodedata.decomposition(unichr(self.code))
        if '<' in tempa:
            tempa = ''
        else:
            tempa = tempa.split()
        #end
        tempa = [int(x, 16) for x in tempa]
        self.dec = []
        for i in tempa:
            tempb = unicodedata.decomposition(unichr(i))
            if '<' in tempb:
                tempb = ''
            else:
                tempb = tempb.split()
            #end
            # if 
            if tempb:
                tempb = [int(x, 16) for x in tempb]
                #print tempb
                self.dec += tempb
            else:
                self.dec += [i]
            #end
        #end             
    #end

            
 #   def decompose(self, char):
    #end

    def decList(self):
        return [unicodedata.name(unichr(x)) for x in self.dec]
    #end
    
    def decString(self):
        return '[%s]' % ']['.join(self.decList())
    #end

    def recList(self):
        if len(self.dec) == 1: return []
        return [unicodedata.name(unichr(x)) for x in self.dec]
    #end
    
    def recString(self):
        tempa = self.decList()
        if tempa == []: return ''
        return '[%s]' % ']['.join(tempa)
    #end

    # isAbove, isBottom,    
#end

def printchr(u, botaccent=0):
    result = ''
    c = int(u, 16)
    if botaccent:
        if unicodedata.combining(unichr(c)) == 230:
            result = '{botaccent}<0,>'
        #end
    #end
    result += '[%s]' % unicodedata.name(unichr(c))
    return result
#end

def decnames(i):
    dec = unicodedata.decomposition(unichr(i))
    if '<' in dec: dec = ''
    return ''.join([printchr(x) for x in dec.split()])
#end

# underaccents    
# COMBINING x -> COMBINING x BELOW
#
ua = ['GRAVE ACCENT',
      'ACUTE ACCENT',
      'MACRON',
      'DIAERESIS',
      'CARON',
      'CIRCUMFLEX ACCENT',
      'BREVE',
      'TILDE']

# COMBINING x ABOVE -> COMBINING x BELOW

uax = ['DOT',
       'RING']

# undersigns
# COMBINING x BELOW => \UseMemAccent{u}{x}
ug = {'COMMA' : ',',
      'DOT'   : '.'}

# special
us = {'CEDILLA' : 'c',
      'OGONEK'  : 'k'}
      

def makemtp(infile, outfile, scriptranges, gx = ''):
    sym = {}
    map = {}

    recout = ''
    mapout = ''
    accout = ''
    symout = ''
    decout = ''
    cmbout = ''
    sameout = ''
    
    scriptranges += [[0xA1, 0xBF], [0x300, 0x36F], [0x2000, 0x206F]]      

    if infile == None:
        
        for b, e in scriptranges:
            for i in range(b, e-1):
                map[i] = i
                sym[i] = ''
            #end
        #end
                
    else:
  
        ts1 = open('ts1.mmap')
        infile = open('%s.mmap' % infile)

        for l in ts1.readlines():
            try:
                enc, uni, dummy = l.split(None, 2)
                map[eval(uni)] = eval(enc)
                sym[eval(uni)] = 'ts1'
            except: pass
        #end

        for l in infile.readlines():
            try:
                enc, uni, dummy = l.split(None, 2)
                map[eval(uni)] = eval(enc)
                sym[eval(uni)] = ''
            except: pass
        #end

        ts1.close()
        infile.close()

    #end    

    sameout = ''
    rb = re = 0x20
        
    for i in range(0, 65535):
        if [1 for j, k in scriptranges if j <= i <= k] or i in map:
            thechar = Unichar(i)
            try:
                n = '[%s]' % unicodedata.name(unichr(i))
            except:
                n = '@"%04X' % i
            #end
            iscomp = 0
            d = unicodedata.decomposition(unichr(i))
            if '<' in d: d = ''
            d = ''.join([printchr(x) for x in d.split()])
            if d and '[' in d[1:]: iscomp = 1
            if not d: d = n
            #if i in map and map[i] == i and not d and not sym[i] and not unicodedata.combining(unichr(i)):
            if i in map and map[i] == i and not sym[i] and not unicodedata.combining(unichr(i)):
                if re == i - 1:
                    re = i
                else:
                    if rb == re:
                        sameout += '|@"%04X' % rb
                    else:
                        sameout += '|@"%04X-@"%04X' % (rb, re)
                    #end
                    rb = re = i
                #end
                if len(d) > 40:
                    tmp = d + ' ' #+ ' ' * 40
                else:
                    tmp = d.ljust(40)
                #end
                if iscomp:
                    recout += '%s => <= %s %s ;\n' % (tmp, n, ('<' in d) and '\\(*+1-1)' or '')
                #end
            elif i in map:
                if  sym[i]:
                    n = '[%s]' % unicodedata.name(unichr(i))
                    symout += '%s => "\UseMemTextSymbol{%s}{%d}";\n' % (n.ljust(30), sym[i].upper(), map[i])
                else:
                    cc = unicodedata.combining(unichr(i))
                    if cc == 230:
                        cmbout += '<acc>%s => "\\UseMemAccent{t}{%d}";\n' % (n, map[i])
                        for uae in ua:
                            if n == '[COMBINING %s]' % uae:
                                cmbout += '<acc>%s => "\\UseMemAccent{u}{%d}";\n' \
                                          % ('[COMBINING %s BELOW]' % uae, map[i])
                            #end
                        #end
                        for uaxe in uax:
                            if n == '[COMBINING %s ABOVE]' % uaxe:
                                cmbout += '<acc>%s => "\\UseMemAccent{u}{%d}";\n' \
                                          % ('[COMBINING %s BELOW]' % uaxe, map[i])
                            #end
                        #end
                    elif cc == 220:
                        cmbout += '<acc>%s => "\\UseMemAccent{b}{%d}";\n' % (n, map[i])
                    elif cc == 202:
                        for use, ust in us.items():
                            if n == '[COMBINING %s]' % use:
                                cmbout += '<acc>%s => "\\UseMemAccent{%s}{%d}";\n' % (n, ust, map[i])
                            #end
                        #end
                    else:
                        if len(d) > 40:
                            tmp = d + ' ' #+ ' ' * 40
                        else:
                            tmp = d.ljust(40)
                        #end
                        if iscomp:
                            recout += '%s => <= %s %s ;\n' % (tmp, n, ('<' in d) and '\\(*+1-1)' or '')
                            if i != map[i]:
                                mapout += '%s => @"%02X ;\n' % (n, map[i])
                            #end
                        else:
                            # Characters below 20 are very often active, ignored or
                            # invalid. However, as a ocp font is concerned they are
                            # valid. We move them to the second page of the PUA and
                            # they will be restored in the _last_ step with \char.
                            if map[i] < 0x20:
                                mapout += '%s => @"%04X ;\n' % (tmp, map[i]+0xe100)
                            else:
                                mapout += '%s => @"%04X ;\n' % (tmp, map[i])
                        #end
                    #end
                #end
            #end
            d = unicodedata.decomposition(unichr(i))
            if '<' in d: d = ''
            if d:
                #d = ''.join(['[%s]' % unicodedata.name(unichr(int(x, 16))) for x in d.split()])
                #decout += '%s\n        => <= %s;\n' % (n, d)
                decout += '%s\n        => <= %s;\n' % (n, thechar.decString())
            #end
        #end
    #end
                
    if rb == re:
        sameout += '|@"%04X' % rb
    else:
        sameout += '|@"%04X-@"%04X' % (rb, re)
    #end
     
    for uge, ugt in ug.items():
        cmbout += '<acc>%s => "\\UseMemAccent{p}{%s}";\n' \
                  % ('[COMBINING %s BELOW]' % uge, ugt)
    #end


    fo = open('%s-com.mtp' % outfile, 'w')

    fo.write('% (c) 2001-2004 Javier Bezos\n\n'
             'input: 2;\n'
             'output: 1;\n\n'
             'states: acc, end;\n\n'
             'aliases:\n\n'
             'topaccent  = (@"0300-@"0315 | @"031A-@"031B | @"033D-@"0344);\n'
             'botaccent  = (@"0316-@"0319 | @"031C-@"0333 | @"0339-@"033C | @"0345);\n'
             'overaccent = (@"0334-@"0338);\n'
             'accent     = (@"0300-@"0345);\n\n'
             'expressions:\n\n')

    if gx: fo.write(gx)
    fo.write('\n\n% Combine if composed form not in main font\n\n')
    fo.write('^({accent}) {topaccent} {topaccent} {accent}    => <=\n'
             '     \\4 "c{" \\3 "c{" \\2 "l{" \\1 "}}}" @"1B  <push: acc>;\n'
             '^({accent}) {topaccent} {botaccent} {topaccent} => <=\n'
             '     \\3 "c{" \\4 "c{" \\2 "l{" \\1 "}}}" @"1B  <push: acc>;\n'
             '^({accent}) {botaccent} {topaccent} {topaccent} => <=\n'
             '     \\2 "c{" \\4 "c{" \\3 "l{" \\1 "}}}" @"1B  <push: acc>;\n'
             '^({accent}) {topaccent} {accent}                => <=\n'
             '     \\3 "c{" \\2 "l{" \\1 "}}" @"1B  <push: acc>;\n'
             '^({accent}) {botaccent} {topaccent}             => <=\n'
             '     \\2 "c{" \\3 "l{" \\1 "}}" @"1B  <push: acc>;\n'
             '^({accent}) {accent}                            => <=\n'
             '     \\2 "l{" \\1 "}" @"1B  <push: acc>;\n\n'
             '<acc> @"1B   => <pop:>;\n')
    fo.write(cmbout)    

    #fo.write('\n\n% Decompose\n\n')
    #fo.write(decout)


    fo.write('\n\n% PUA -> ASCII\n\n'
             '@"E125          => "\\MemMoveOtherChar\\%"; % as a special case (37).\n'
             '(@"E100-@"E17F) => "\\MemMoveOtherChar\\" #(\\1 - @"E100);\n\n')    

    fo.write('\n\n% Glyphs in main font with same value as Unicode characters\n\n')
    fo.write('(%s) => \\1;' % sameout[1:])

    fo.close()


    fo = open('%s-rec.mtp' % outfile, 'w')

    fo.write('% (c) 2001-2004 Javier Bezos\n\n'
             'input: 2;\n'
             'output: 2;\n\n'
             'states: acc;\n\n'
             'aliases:\n\n'
             'topaccent  = (@"0300-@"0315 | @"031A-@"031B | @"033D-@"0344);\n'
             'botaccent  = (@"0316-@"0319 | @"031C-@"0333 | @"0339-@"033C | @"0345);\n'
             'overaccent = (@"0334-@"0338);\n'
             'accent     = (@"0300-@"0345);\n\n'
             'expressions:\n\n')

    fo.write('\n\n% Recompose available symbols\n\n')
    if not (recout + cmbout): recout = '. => \\1 ;'
    fo.write(recout)

    fo.write('\n\n% Unicode to main font, except those below\n\n')
    fo.write(accout + '\n')
    fo.write(mapout)

    fo.write('\n\n% Symbols not in main font. Use Symbol fonts\n\n')
    fo.write(symout)


    fo.write('\n\n% Spaces and control symbols\n\n')
    fo.write('[NO-BREAK SPACE]  => "\\nobreakspace ";\n'
             '[SOFT HYPHEN]     => "\\-";\n'
             '[EN QUAD]   => "\\enskip ";\n'
             '[EM QUAD]   => "\\quad ";\n'
             '[EN SPACE]  => "\\enspace ";\n'
             '[EM SPACE]  => "\\quad ";\n'
             '[THREE-PER-EM SPACE] => "\\kern.333em ";\n'
             '[FOUR-PER-EM SPACE]  => "\\kern.25em ";\n'
             '[SIX-PER-EM SPACE]   => "\\thinspace ";\n'
             '[FIGURE SPACE]       => "\\kern\\fontcharwd\\font`0 ";\n'
             '[PUNCTUATION SPACE]  => "\\kern\\fontcharwd\\font`. ";\n'
             '[THIN SPACE]         => "\\kern.2em ";\n'
             '[HAIR SPACE]         => "\\kern1pt ";\n'
             '[ZERO WIDTH SPACE]   => "\\kern0pt plus\\fontdimen3\\font minus \\fontdimen4\\font ";\n'
             '[LINE SEPARATOR]     => "\\\\";\n'
             '[PARAGRAPH SEPARATOR] => "\\par ";\n')

    fo.write('\n\n% Accents are passed to the nest step\n\n')
    fo.write('\n\n{accent} => \\1;')

    fo.write('\n\n% Glyphs in main font with same value as Unicode characters\n'
             '% and PUA\n')
    fo.write('(%s|@"E100-@"E17F) => \\1;' % sameout[1:])

    fo.write('\n\n% Other characters, raise error\n\n')
    fo.write('. => "\\MemUnknownCharacter{" \\1 "}";')
    
    fo.close()    

    fo = open('%s-dec.mtp' % outfile, 'w')

    fo.write('% (c) 2001-2004 Javier Bezos\n\n'
             'input: 2;\n'
             'output: 2;\n\n'
             'expressions:\n\n')

    fo.write('\n\n% Decompose\n\n')
    fo.write(decout)

    fo.close()    

    
#def createmtp(enc, ranges):
#    encfile = open('%s.mmap' % enc)
#    scriptranges = ranges

makemtp('t1',  't1',  [[0x21, 0x2FF], [0x1E00, 0x1EFF]])
makemtp('ot1', 'ot1', [[0x21, 0x2FF], [0x1E00, 0x1EFF]])
makemtp('t2a', 't2a', [[0x400, 0x52F]])

tmp = r"""
. [COMBINING COMMA ABOVE][COMBINING ACUTE ACCENT] => ">'" \1;
. [COMBINING COMMA ABOVE][COMBINING GRAVE ACCENT] => ">`" \1;
. [COMBINING COMMA ABOVE][COMBINING GREEK PERISPOMENI] => ">=" \1;

. [COMBINING REVERSED COMMA ABOVE][COMBINING ACUTE ACCENT] => "<'" \1;
. [COMBINING REVERSED COMMA ABOVE][COMBINING GRAVE ACCENT] => "<`" \1;
. [COMBINING REVERSED COMMA ABOVE][COMBINING GREEK PERISPOMENI] => @"40 \1;

. [COMBINING COMMA ABOVE]=> ">" \1;
. [COMBINING REVERSED COMMA ABOVE] => "<" \1;

. [COMBINING ACUTE ACCENT] => "'" \1;
. [COMBINING GRAVE ACCENT] => "`" \1;
. [COMBINING GREEK PERISPOMENI] => "=" \1;
"""

makemtp('lgr', 'lgr', [[0x370, 0x3FF], [0x1F00, 0x1FFF]], gx = tmp)

makemtp(None, 'ula',  [[0x21, 0x2FF], [0x1E00, 0x1EFF]])
makemtp(None, 'ucy',  [[0x21, 0x7F],  [0x400, 0x52F]])
makemtp(None, 'uel',  [[0x21, 0x7F], [0x370, 0x3FF], [0x1F00, 0x1FFF]])