{ +-------------------------------------------------------------+ }
{ |                                                             | }
{ |   GM-Software                                               | }
{ |   ===========                                               | }
{ |                                                             | }
{ |   Project: All Projects                                     | }
{ |                                                             | }
{ |   Description: Character encodings. Thread safe.            | }
{ |                Conforms to RFC 3548/4648                    | }
{ |                                                             | }
{ |   Copyright (C) - 2015 - Gerrit Moeller.                    | }
{ |                                                             | }
{ |   Source code distributed under MIT license.                | }
{ |                                                             | }
{ |   See: https://www.gm-software.de                           | }
{ |                                                             | }
{ +-------------------------------------------------------------+ }

{$INCLUDE GMCompilerSettings.inc}

unit GMCharCoding;

interface

uses GMStrDef, GMCommon;


function GMEncodeBase16(const ADataPtr: PByte; const ADataSizeInBytes: PtrUInt): TGMString;
function GMEncodeBase16Str(const ABinValueBytes: RawByteString): TGMString;
function GMDecodeBase16Str(const AValue: TGMString): RawByteString;

function GMEncodeBase32Str(const ABinValueBytes: RawByteString; const AAddPadding: Boolean = True): TGMString;
function GMDecodeBase32Str(const AValue: TGMString): RawByteString;

function GMEncodeBase32HexStr(const ABinValueBytes: RawByteString; const AAddPadding: Boolean = True): TGMString;
function GMDecodeBase32HexStr(const AValue: TGMString): RawByteString;

function GMEncodeBase64Str(const ABinValueBytes: RawByteString; const AAddPadding: Boolean = True): TGMString;
function GMDecodeBase64Str(const AValue: TGMString): RawByteString;

function GMEncodeBase64UrlStr(const ABinValueBytes: RawByteString; const AAddPadding: Boolean = True): TGMString;
function GMDecodeBase64UrlStr(const AValue: TGMString): RawByteString;


procedure BuildDecodeTable(var ADecodeTable: RawByteString; const AAlphabet: TGMString);

function GMEncodeBaseXX(ADataPtr: PByte; const ADataSizeInBytes: PtrUInt; const AAlphabet: TGMString; const ABitwidth: Integer; const AAppendPadding: Boolean; const APadAlignment: Integer): TGMString; overload;
function GMEncodeBaseXX(const ABinValueBytes: RawByteString; const AAlphabet: TGMString; const ABitwidth: Integer; const AAppendPadding: Boolean; const APadAlignment: Integer): TGMString; overload;
function GMDecodeBaseXX(const AValue: TGMString; const ADecodeTable: RawByteString; const ABitLen: Integer; const ACallingName: TGMString): RawByteString;


type

  EGMCharCodingError = class(EGMException);


const

  cBase16Alphabet =    '0123456789ABCDEF';
  cBase32Alphabet =    'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567';
  cBase32HexAlphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUV';
  cBase64Alphabet =    'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
  cBase64UrlAlphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_';


implementation

uses GMIntf;

resourcestring

  RStrInvalidInputChar = 'Invalid input character "%s" at position %d';


{ ------------------ }
{ ---- Encoding ---- }
{ ------------------ }

function GMEncodeBaseXX(ADataPtr: PByte; const ADataSizeInBytes: PtrUInt; const AAlphabet: TGMString; const ABitwidth: Integer; const AAppendPadding: Boolean; const APadAlignment: Integer): TGMString; overload;
const cBitMask: array [0..16] of Word= (0, 1, 3, 7, $f, $1f, $3f, $7f, $ff, $1ff, $3ff, $7ff, $fff, $1fff, $3fff, $7fff, $ffff);
var inChPos, val, bits: Integer;
begin
  SetLength(Result, 0); val := 0; bits := 0;

  for inChPos := 1 to ADataSizeInBytes do
   begin
    val := val shl 8;
    val := val or ADataPtr^; // Ord(ABinValueBytes[inChPos]);
    Inc(bits, 8);

    while bits >= ABitwidth do
     begin
//    bits := (bits - ABitwidth);
      Dec(bits, ABitwidth);
      Result := Result + AAlphabet[(val shr bits) + 1];
      val := val and cBitMask[bits];
     end;
    Inc(ADataPtr);
   end;

  if bits > 0 then
   begin
    val := val shl (ABitwidth - bits);
    Result := Result + AAlphabet[val + 1];
   end;

  if AAppendPadding and (APadAlignment > 0) then
    for inChPos := Length(Result) to GMAlignedValue(Length(Result), APadAlignment) - 1 do  Result := Result + '=';
end;

function GMEncodeBaseXX(const ABinValueBytes: RawByteString; const AAlphabet: TGMString; const ABitwidth: Integer; const AAppendPadding: Boolean; const APadAlignment: Integer): TGMString;
//const cBitMask: array [0..16] of Word= (0, 1, 3, 7, $f, $1f, $3f, $7f, $ff, $1ff, $3ff, $7ff, $fff, $1fff, $3fff, $7fff, $ffff);
//var inChPos, val, bits: Integer;
begin
  Result := GMEncodeBaseXX(PByte(ABinValueBytes), Length(ABinValueBytes), AAlphabet, ABitwidth, AAppendPadding, APadAlignment);
//  SetLength(Result, 0); val := 0; bits := 0;
//
//  for inChPos := 1 to Length(ABinValueBytes) do
//   begin
//    val := val shl 8;
//    val := val or Ord(ABinValueBytes[inChPos]);
//    Inc(bits, 8);
//
//    while bits >= ABitwidth do
//     begin
////    bits := (bits - ABitwidth);
//      Dec(bits, ABitwidth);
//      Result := Result + AAlphabet[(val shr bits) + 1];
//      val := val and cBitMask[bits];
//     end;
//   end;
//
//  if bits > 0 then
//   begin
//    val := val shl (ABitwidth - bits);
//    Result := Result + AAlphabet[val + 1];
//   end;
//
//  if AAppendPadding and (APadAlignment > 0) then
//    for inChPos := Length(Result) to GMAlignedValue(Length(Result), APadAlignment) - 1 do  Result := Result + '=';
end;

function GMEncodeBase16(const ADataPtr: PByte; const ADataSizeInBytes: PtrUInt): TGMString;
begin
  Result := GMEncodeBaseXX(ADataPtr, ADataSizeInBytes, cBase16Alphabet, 4, False, 0);
end;

function GMEncodeBase16Str(const ABinValueBytes: RawByteString): TGMString;
begin
  Result := GMEncodeBaseXX(ABinValueBytes, cBase16Alphabet, 4, False, 0);
end;

function GMEncodeBase32Str(const ABinValueBytes: RawByteString; const AAddPadding: Boolean = True): TGMString;
begin
  Result := GMEncodeBaseXX(ABinValueBytes, cBase32Alphabet, 5, AAddPadding, 8);
end;

function GMEncodeBase32HexStr(const ABinValueBytes: RawByteString; const AAddPadding: Boolean = True): TGMString;
begin
  Result := GMEncodeBaseXX(ABinValueBytes, cBase32HexAlphabet, 5, AAddPadding, 8);
end;

function GMEncodeBase64Str(const ABinValueBytes: RawByteString; const AAddPadding: Boolean = True): TGMString;
begin
  Result := GMEncodeBaseXX(ABinValueBytes, cBase64Alphabet, 6, AAddPadding, 4);
end;

function GMEncodeBase64UrlStr(const ABinValueBytes: RawByteString; const AAddPadding: Boolean = True): TGMString;
begin
  Result := GMEncodeBaseXX(ABinValueBytes, cBase64UrlAlphabet, 6, AAddPadding, 4);
end;


{ ------------------ }
{ ---- Decoding ---- }
{ ------------------ }

var

  vCSBuildDecodeTable: IGMCriticalSection = nil;

  vBase16DecodeTable: RawByteString = '';
  vBase32DecodeTable: RawByteString = '';
  vBase32HexDecodeTable: RawByteString = '';
  vBase64DecodeTable: RawByteString = '';
  vBase64UrlDecodeTable: RawByteString = '';


procedure BuildDecodeTable(var ADecodeTable: RawByteString; const AAlphabet: TGMString);
var i: Integer;
begin
  GMEnterCriticalSection(vCSBuildDecodeTable);
  try
   if Length(ADecodeTable) <> 256 then
    begin
     SetLength(ADecodeTable, 256);
     FillByte(PAnsiChar(ADecodeTable)^, Length(ADecodeTable), 0);
     for i:=1 to Length(AAlphabet) do ADecodeTable[Byte(Ord(AAlphabet[i]))+1] := Chr(i);
    end;
  finally
   GMLeaveCriticalSection(vCSBuildDecodeTable);
  end;
end;

function GMDecodeBaseXX(const AValue: TGMString; const ADecodeTable: RawByteString; const ABitLen: Integer; const ACallingName: TGMString): RawByteString;
var i, val, bits, chVal: Integer; decodeVal: Byte;
  procedure InvalidInputError;
  begin
    raise EGMCharCodingError.ObjError(GMFormat(GMStringJoin(ACallingName, ': ', RStrInvalidInputChar), [AValue[i], i]), nil, ACallingName);
  end;
begin
  SetLength(Result, 0); val := 0; bits := 0;

  for i:=1 to Length(AValue) do
   begin
    chVal := Ord(AValue[i]);
    if chVal > 255 then InvalidInputError;
    decodeVal := Ord(ADecodeTable[chVal+1]);

    if decodeVal > 0 then
     begin
      val := val shl ABitLen;
      val := val or (decodeVal - 1);

      bits := bits + ABitLen;

      while bits >= 8 do
       begin
        bits := bits - 8;
        Result := Result + AnsiChar((val shr bits)); //  and $FF
       end;
     end
    else case AValue[i] of
          //#10, #13: ; Nothing, skip line breaks?
          '=': Break;
          else InvalidInputError;
         end;
   end;
end;

function GMDecodeBase16Str(const AValue: TGMString): RawByteString;
begin
  BuildDecodeTable(vBase16DecodeTable, cBase16Alphabet);
  Result := GMDecodeBaseXX(GMUpperCase(AValue), vBase16DecodeTable, 4, 'GMDecodeBase16Str');
end;

function GMDecodeBase32Str(const AValue: TGMString): RawByteString;
begin
  BuildDecodeTable(vBase32DecodeTable, cBase32Alphabet);
  Result := GMDecodeBaseXX(GMUpperCase(AValue), vBase32DecodeTable, 5, 'GMDecodeBase32Str');
end;

function GMDecodeBase32HexStr(const AValue: TGMString): RawByteString;
begin
  BuildDecodeTable(vBase32HexDecodeTable, cBase32HexAlphabet);
  Result := GMDecodeBaseXX(GMUpperCase(AValue), vBase32HexDecodeTable, 5, 'GMDecodeBase32HexStr');
end;

function GMDecodeBase64Str(const AValue: TGMString): RawByteString;
begin
  BuildDecodeTable(vBase64DecodeTable, cBase64Alphabet);
  Result := GMDecodeBaseXX(AValue, vBase64DecodeTable, 6, 'GMDecodeBase64Str'); // <- No Upper case here! Base 64 alphabet distunguishes upper and lower charachters!
end;

function GMDecodeBase64UrlStr(const AValue: TGMString): RawByteString;
begin
  BuildDecodeTable(vBase64UrlDecodeTable, cBase64UrlAlphabet);
  Result := GMDecodeBaseXX(AValue, vBase64UrlDecodeTable, 6, 'GMDecodeBase64UrlStr'); // <- No Upper case here! Base 64 alphabet distunguishes upper and lower charachters!
end;


initialization

  vCSBuildDecodeTable := TGMCriticalSection.Create;

end.