1 /++ SJIS encoding/decoding. 2 + 3 + Authors: Cameron "Herringway" Ross 4 + Copyright: Cameron "Herringway" Ross 5 + License: Boost Software License 1.0 6 +/ 7 module sjisish; 8 9 private immutable dchar[ushort] fromSJISTable; 10 private immutable ushort[dchar] toSJISTable; 11 12 import std.traits : isSomeString; 13 import std.typecons : Flag; 14 15 /++ 16 + Encodes an SJIS string as unicode. 17 + 18 + Params: 19 + T = Type of string to output. 20 + input = Raw SJIS string to encode. 21 +/ 22 23 auto toUTF(T = string)(const ubyte[] input) if (isSomeString!T) { 24 T output; 25 if (!__ctfe) { 26 output.reserve(input.length); 27 } 28 29 for (int i = 0; i < input.length; i++) { 30 if ((input[i] >= 0x80) && (input[i] < 0xA1)) { 31 ushort chr = (input[i]<<8)+input[i+1]; 32 if (chr in fromSJISTable) { 33 output ~= fromSJISTable[chr]; 34 } else { 35 output ~= '\uFFFD'; 36 } 37 i++; 38 } else if ((input[i] >= 0xA1) && (input[i] < 0xE0)) { 39 output ~= fromSJISTable[input[i]]; 40 } else if (input[i] >= 0xE0) { 41 ushort chr = (input[i]<<8)+input[i+1]; 42 if (chr in fromSJISTable) { 43 output ~= fromSJISTable[chr]; 44 } else { 45 output ~= '\uFFFD'; 46 } 47 } else { 48 immutable char x = input[i]; 49 output ~= x; 50 } 51 } 52 return output; 53 } 54 /// 55 @safe pure unittest { 56 assert(toUTF([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]) == "Hello."); 57 assert(toUTF([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c, 0x82, 0x8f, 0x81, 0x44]) == "Hello."); 58 assert(toUTF!dstring([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]) == "Hello."d); 59 assert(toUTF!dstring([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c, 0x82, 0x8f, 0x81, 0x44]) == "Hello."d); 60 assert(toUTF!wstring([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]) == "Hello."w); 61 assert(toUTF!wstring([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c, 0x82, 0x8f, 0x81, 0x44]) == "Hello."w); 62 } 63 64 /// Holds an SJIS string. 65 struct SJISString { 66 alias raw this; 67 /// Raw data. 68 immutable(ubyte)[] raw; 69 70 /// Convert string to unicode. 71 auto toUTF(T = string)() const { 72 return raw.toUTF!T; 73 } 74 } 75 76 /++ 77 + Encodes a unicode string as SJIS. 78 + 79 + Note: Badly-formed unicode strings will always fail. 80 + Params: 81 + input = String to encode. 82 + skipInvalidCharacters = Whether to skip characters that don't exist in SJIS or throw an exception. 83 +/ 84 auto toSJIS(T)(T input, Flag!"IgnoreInvalid" skipInvalidCharacters = Flag!"IgnoreInvalid".no) if (isSomeString!T) { 85 import std.exception : enforce; 86 SJISString output; 87 if (!__ctfe) { 88 output.reserve(input.length); 89 } 90 91 foreach (dchar character; input) { 92 auto sjisCharPtr = character in toSJISTable; 93 if (!skipInvalidCharacters) { 94 enforce(sjisCharPtr, "Illegal SJIS character detected in input."); 95 } 96 auto sjisChar = *sjisCharPtr; 97 if (sjisChar > 0xFF) { 98 output.raw ~= cast(ubyte)((sjisChar&0xFF00)>>8); 99 output.raw ~= cast(ubyte)(sjisChar&0xFF); 100 } else { 101 output.raw ~= cast(ubyte)sjisChar; 102 } 103 } 104 105 return output; 106 } 107 /// 108 @safe pure unittest { 109 assert(toSJIS("Hello.") == SJISString([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e])); 110 assert(toSJIS("Hello.") == SJISString([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c, 0x82, 0x8f, 0x81, 0x44])); 111 assert(toSJIS("Hello."d) == SJISString([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e])); 112 assert(toSJIS("Hello."d) == SJISString([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c, 0x82, 0x8f, 0x81, 0x44])); 113 assert(toSJIS("Hello."w) == SJISString([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e])); 114 assert(toSJIS("Hello."w) == SJISString([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c, 0x82, 0x8f, 0x81, 0x44])); 115 } 116 117 ///Initialize character table. 118 shared static this() { 119 import std.algorithm.iteration : splitter; 120 import std.algorithm.searching : startsWith; 121 import std.conv : to; 122 import std.string : lineSplitter; 123 auto str = import("SHIFTJIS.TXT"); 124 foreach (line; str.lineSplitter) { 125 if (line.startsWith("#")) { 126 continue; 127 } 128 auto split = line.splitter("\t"); 129 auto bytesequence = split.front[2..$].to!ushort(16); 130 split.popFront(); 131 auto sjisChar = split.front[2..$].to!ushort(16); 132 fromSJISTable[bytesequence] = sjisChar; 133 toSJISTable[sjisChar] = bytesequence; 134 } 135 136 }