1 /++ SJIS encoding/decoding.
2 +
3 +	Authors: Cameron "Herringway" Ross
4 +	Copyright: Cameron "Herringway" Ross
5 +	License: Boost Software License 1.0
6 +/
7 module sjisish;
8 
9 private immutable dchar[ushort] fromSJISTable;
10 private immutable ushort[dchar] toSJISTable;
11 
12 import std.traits : isSomeString;
13 import std.typecons : Flag;
14 
15 /++
16 + Encodes an SJIS string as unicode.
17 +
18 + Params:
19 +	T = Type of string to output.
20 +	input = Raw SJIS string to encode.
21 +/
22 
23 auto toUTF(T = string)(const ubyte[] input) if (isSomeString!T) {
24 	T output;
25 	if (!__ctfe) {
26 		output.reserve(input.length);
27 	}
28 
29 	for (int i = 0; i < input.length; i++) {
30 		if ((input[i] >= 0x80) && (input[i] < 0xA1)) {
31 			ushort chr = (input[i]<<8)+input[i+1];
32 			if (chr in fromSJISTable) {
33 				output ~= fromSJISTable[chr];
34 			} else {
35 				output ~= '\uFFFD';
36 			}
37 			i++;
38 		} else if ((input[i] >= 0xA1) && (input[i] < 0xE0)) {
39 			output ~= fromSJISTable[input[i]];
40 		} else if (input[i] >= 0xE0) {
41 			ushort chr = (input[i]<<8)+input[i+1];
42 			if (chr in fromSJISTable) {
43 				output ~= fromSJISTable[chr];
44 			} else {
45 				output ~= '\uFFFD';
46 			}
47 		} else {
48 			immutable char x = input[i];
49 			output ~= x;
50 		}
51 	}
52 	return output;
53 }
54 ///
55 @safe pure unittest {
56 	assert(toUTF([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]) == "Hello.");
57 	assert(toUTF([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c,  0x82, 0x8f, 0x81, 0x44]) == "Hello.");
58 	assert(toUTF!dstring([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]) == "Hello."d);
59 	assert(toUTF!dstring([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c,  0x82, 0x8f, 0x81, 0x44]) == "Hello."d);
60 	assert(toUTF!wstring([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]) == "Hello."w);
61 	assert(toUTF!wstring([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c,  0x82, 0x8f, 0x81, 0x44]) == "Hello."w);
62 }
63 
64 /// Holds an SJIS string.
65 struct SJISString {
66 	alias raw this;
67 	/// Raw data.
68 	immutable(ubyte)[] raw;
69 
70 	/// Convert string to unicode.
71 	auto toUTF(T = string)() const {
72 		return raw.toUTF!T;
73 	}
74 }
75 
76 /++
77 + Encodes a unicode string as SJIS.
78 +
79 + Note: Badly-formed unicode strings will always fail.
80 + Params:
81 +	input = String to encode.
82 +	skipInvalidCharacters = Whether to skip characters that don't exist in SJIS or throw an exception.
83 +/
84 auto toSJIS(T)(T input, Flag!"IgnoreInvalid" skipInvalidCharacters = Flag!"IgnoreInvalid".no) if (isSomeString!T) {
85 	import std.exception : enforce;
86 	SJISString output;
87 	if (!__ctfe) {
88 		output.reserve(input.length);
89 	}
90 
91 	foreach (dchar character; input) {
92 		auto sjisCharPtr = character in toSJISTable;
93 		if (!skipInvalidCharacters) {
94 			enforce(sjisCharPtr, "Illegal SJIS character detected in input.");
95 		}
96 		auto sjisChar = *sjisCharPtr;
97 		if (sjisChar > 0xFF) {
98 			output.raw ~= cast(ubyte)((sjisChar&0xFF00)>>8);
99 			output.raw ~= cast(ubyte)(sjisChar&0xFF);
100 		} else {
101 			output.raw ~= cast(ubyte)sjisChar;
102 		}
103 	}
104 
105 	return output;
106 }
107 ///
108 @safe pure unittest {
109 	assert(toSJIS("Hello.") == SJISString([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]));
110 	assert(toSJIS("Hello.") == SJISString([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c,  0x82, 0x8f, 0x81, 0x44]));
111 	assert(toSJIS("Hello."d) == SJISString([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]));
112 	assert(toSJIS("Hello."d) == SJISString([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c,  0x82, 0x8f, 0x81, 0x44]));
113 	assert(toSJIS("Hello."w) == SJISString([0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2e]));
114 	assert(toSJIS("Hello."w) == SJISString([0x82, 0x67, 0x82, 0x85, 0x82, 0x8c, 0x82, 0x8c,  0x82, 0x8f, 0x81, 0x44]));
115 }
116 
117 ///Initialize character table.
118 shared static this() {
119 	import std.algorithm.iteration : splitter;
120 	import std.algorithm.searching : startsWith;
121 	import std.conv : to;
122 	import std.string : lineSplitter;
123 	auto str = import("SHIFTJIS.TXT");
124 	foreach (line; str.lineSplitter) {
125 		if (line.startsWith("#")) {
126 			continue;
127 		}
128 		auto split = line.splitter("\t");
129 		auto bytesequence = split.front[2..$].to!ushort(16);
130 		split.popFront();
131 		auto sjisChar = split.front[2..$].to!ushort(16);
132 		fromSJISTable[bytesequence] = sjisChar;
133 		toSJISTable[sjisChar] = bytesequence;
134 	}
135 
136 }