Encoding 128bit Numbers as Strings

My current project needed a way to convert between 128 bit numbers (e.g. java.util.UUID) and strings. A special requirement is that the strings be legal Java identifiers, because we use them for variable names in generated code. For the same reason, I wanted to ensure that the strings were of minimal size.

I was able to come up with a trivial algorithm in a few minutes that works OK, but I felt that I should be able to come up with the most optimal solution given a little more time. It turns out that the optimal solution consumed most of this weekend, but I finally got it working.

There’s no way I can bill my customer for this exercise, so I thought I’d post it here in case anyone else finds it interesting. I’m also curious to see if anyone can come up with a better or faster solution.

Basically my approach is to treat the 128 bit number as up to 21 groups of 6 bits each. This gives me 64 possible characters which works out perfectly, because Java has exactly 64 legal characters for identifiers (0-9, A-Z, a-z, _, and $). I also prepend an additional underscore to avoid illegal starting characters and clashes with keywords and literals.

For now, here’s the Java source to ponder. Tomorrow I’ll discuss the code in more detail, and post a VB version which I actually wrote first, then ported to Java.


001 
002 public final class Utils {
003 
004     private static final byte bUnder = 95;
005     private static final byte bDollar = 36;
006     private static final byte bQuest = 63;
007     private static final byte bFirstUAlpha = 65;
008     private static final byte bFirstLAlpha = 97;
009     private static final byte bFirstNum = 48;
010     private static final long mask6Bits = 63;
011     private static final long mask4Bits = 15;
012     private static final long mask2Bits = 3;
013     private static final int bitsPerChar = 6;
014 
015     private static final char[] charMap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$"
016                             .toCharArray();
017 
018     public final static class Int128 {
019         public final long msb;
020         public final long lsb;
021         public Int128(long m, long l) {
022             msb = m;
023             lsb = l;
024         }
025         public Int128() {
026             this(00);
027         }
028         @Override
029         public final String toString() {
030             return "" + msb + "," + lsb;
031         }
032     }
033 
034     public static String int128ToStr(long msb, long lsb) {
035         final int MAX_BYTE = 31;
036         char[] buf = new char[32];
037         int i = MAX_BYTE;
038 
039         // Process the least significant bits
040         // 64 bit number has 10 six bit encoded values plus four bits left over
041         for (int n = 0; n < 10; ++n) {
042             if (lsb == && msb == 0) {
043                 break// Eliminate leading zeros
044             }
045             byte b = (byte) (lsb & mask6Bits);
046             buf[i--= charMap[b];
047             lsb >>>= bitsPerChar;
048         }
049 
050         // 4 bits from the lsb, and 2 from the msb
051         if (lsb != || msb != || i == MAX_BYTE) {
052             long leftOver = lsb & 15;
053             long firstTwo = msb & 3;
054             byte b = (byte) ((firstTwo << 4| leftOver);
055             buf[i--= charMap[b];
056             msb >>>= 2;
057         }
058 
059         // Process the most significant bits
060         while (msb != 0) {
061             byte b = (byte) (msb & mask6Bits);
062             buf[i--= charMap[b];
063             msb >>>= bitsPerChar;
064         }
065 
066         // It's easiest to simply prefix an underscore to avoid
067         // illegal identifiers and clashes with keywords and literals.
068         buf[i--'_';
069 
070         return new String(buf, i + 1, MAX_BYTE - i);
071     }
072 
073     public static Int128 strToInt128(String s) {
074         byte[] buf = s.getBytes();
075         int maxByte = buf.length - 1;
076         int i = maxByte;
077         int minByte = 0;
078         if (buf[0== bUnder) {
079             minByte = 1;
080         }
081         long msb = 0;
082         long lsb = 0;
083 
084         for (int n = 0; n < 10; ++n) {
085             if (i < minByte) {
086                 return new Int128(msb, lsb);
087             }
088             long b = charToMod64(buf[i]);
089             if (b != 0) {
090                 if (n != 0) {
091                     b <<= (n * 6);
092                 }
093                 lsb |= b;
094             }
095             --i;
096         }
097 
098         if (i >= minByte) {
099             long b = charToMod64(buf[i]);
100             if (b != 0) {
101                 long leftOver = b & mask4Bits;
102                 msb = (b >>> 4& mask2Bits;
103                 leftOver <<= 60;
104                 lsb |= leftOver;
105             }
106             --i;
107         }
108 
109         for (int n = 0; n < 11; ++n) {
110             if (i < minByte) {
111                 return new Int128(msb, lsb);
112             }
113             long b = charToMod64(buf[i]);
114             if (b != 0) {
115                 b <<= (n * bitsPerChar + 2);
116                 msb |= b;
117             }
118             --i;
119         }
120         return new Int128(msb, lsb);
121     }
122 
123     private static byte charToMod64(byte c) {
124         if (c >= bFirstNum && c <= bFirstNum + 10) {
125             return (byte) (c - 48);
126         else if (c >= bFirstUAlpha && c <= bFirstUAlpha + 26) {
127             return (byte) (c - bFirstUAlpha + 10);
128         else {
129             if (c >= bFirstLAlpha && c <= bFirstLAlpha + 26) {
130                 return (byte) (c - bFirstLAlpha + 26 10);
131             else if (c == bUnder) {
132                 return 62;
133             else if (c == bDollar) {
134                 return 63;
135             else {
136                 assert false;
137                 return 0;
138             }
139         }
140     }
141 }

Java2html

Advertisements
This entry was posted in Uncategorized. Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s