Unicode-Codierung erkennen |
|
| Autor | Christian Rehn |
|---|---|
| System | Win9x, WinNT, Win2000, WinXP, Vista, Win7 |
| Ab Delphi-Version | Delphi 1 |
| Letzte Änderung | 05.05.2011 |
Eine mit Unicode kodierte Textdatei enthällt einen Header, die so genannte BOM (ByteOderMark), der angibt um welche konkrete Unicode-Kodierung(UTF8, UFT16BigEndian, etc.) es sich handelt. Die BOM lässt sich wie folgt auslesen:
type
TUnicodeType = (btUTF8, btUTF16BigEndian, btUTF16LittleEndian, btUTF32BigEndian,
btUTF32LittleEndian, btNoUnicode);
...
function GetUnicodeTypeFromFile(AFileName: string): TUnicodeType;
var
stream: TFileStream;
BOM: AnsiString;
begin
stream := TFileStream.Create(AFileName, fmOpenRead);
try
BOM := ' ';
stream.Read(BOM[1], 4);
finally
stream.Free;
end;
if Copy(BOM, 1, 3) = #$EF#$BB#$BF then
Result := btUTF8
else if BOM = #$00#$00#$FE#$FF then
Result := btUTF32BigEndian
else if BOM = #$FF#$FE#$00#$00 then
Result := btUTF32LittleEndian
else if Copy(BOM, 1, 2) = #$FE#$FF then
Result := btUTF16BigEndian
else if Copy(BOM, 1, 2) = #$FF#$FE then
Result := btUTF16LittleEndian
else
Result := btNoUnicode;
end;
TUnicodeType = (btUTF8, btUTF16BigEndian, btUTF16LittleEndian, btUTF32BigEndian,
btUTF32LittleEndian, btNoUnicode);
...
function GetUnicodeTypeFromFile(AFileName: string): TUnicodeType;
var
stream: TFileStream;
BOM: AnsiString;
begin
stream := TFileStream.Create(AFileName, fmOpenRead);
try
BOM := ' ';
stream.Read(BOM[1], 4);
finally
stream.Free;
end;
if Copy(BOM, 1, 3) = #$EF#$BB#$BF then
Result := btUTF8
else if BOM = #$00#$00#$FE#$FF then
Result := btUTF32BigEndian
else if BOM = #$FF#$FE#$00#$00 then
Result := btUTF32LittleEndian
else if Copy(BOM, 1, 2) = #$FE#$FF then
Result := btUTF16BigEndian
else if Copy(BOM, 1, 2) = #$FF#$FE then
Result := btUTF16LittleEndian
else
Result := btNoUnicode;
end;