1
0
mirror of https://github.com/SineVector241/VoiceCraft-MCBE_Proximity_Chat.git synced 2024-11-24 14:36:13 +00:00
VoiceCraft-MCBE_Proximity_Chat/ATL/AudioData/IO/CAF.cs
2024-07-13 11:16:08 +10:00

409 lines
17 KiB
C#

using Commons;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using static ATL.ChannelsArrangements;
using static ATL.TagData;
namespace ATL.AudioData.IO
{
/// <summary>
/// Class for Apple Core Audio files manipulation (extension : .CAF)
///
/// Implementation notes
///
/// 1. "Functional" metadata reading and writing
///
/// Due to the rarity of CAF files with actual metadata (i.e. strg or info chunks) :
/// - the implementation of metadata reading is experimental, as in "theroretically working, but untested"
/// - there is no implementation for metadata writing
///
/// Anyone who wants these features and has "rich" CAF files is welcome to open a new github issue about it.
/// </summary>
class CAF : MetaDataIO, IAudioDataIO
{
private const uint CAF_MAGIC_NUMBER = 1667327590; // 'caff'
public const string CHUNK_AUDIO_DESC = "desc";
public const string CHUNK_PACKET_TABLE = "pakt";
public const string CHUNK_CHANNEL_LAYOUT = "chan";
public const string CHUNK_COOKIE = "kuki";
public const string CHUNK_STRINGS = "strg";
public const string CHUNK_INFO = "info";
public const string CHUNK_UMID = "umid";
public const string CHUNK_AUDIO = "data";
public const string CHUNK_PADDING = "free";
// Mapping between CAF channels layout codes and ATL ChannelsArrangement
private static readonly Dictionary<uint, ChannelsArrangement> channelsMapping = new Dictionary<uint, ChannelsArrangement>() {
{ (100 << 16) | 1, MONO },
{ (101 << 16) | 2, STEREO },
{ (102 << 16) | 2, STEREO }, // Stereo / headphone playback
{ (103 << 16) | 2, STEREO_LEFT_RIGHT_TOTAL },
{ (104 << 16) | 2, JOINT_STEREO_MID_SIDE },
{ (105 << 16) | 2, STEREO_XY },
{ (106 << 16) | 2, STEREO_BINAURAL },
{ (107 << 16) | 4, AMBISONIC_B },
{ (108 << 16) | 4, QUAD },
{ (109 << 16) | 5, PENTAGONAL },
{ (110 << 16) | 6, HEXAGONAL },
{ (111 << 16) | 8, OCTAGONAL },
{ (112 << 16) | 8, CUBE },
{ (113 << 16) | 3, ISO_3_0_0 },
{ (114 << 16) | 3, ISO_3_0_0 }, // ATL doesn't differentiate multiple channel combinations
{ (115 << 16) | 4, LRCS },
{ (116 << 16) | 4, LRCS }, // ATL doesn't differentiate multiple channel combinations
{ (117 << 16) | 5, ISO_3_2_0 },
{ (118 << 16) | 5, ISO_3_2_0 }, // ATL doesn't differentiate multiple channel combinations
{ (119 << 16) | 5, ISO_3_2_0 }, // ATL doesn't differentiate multiple channel combinations
{ (120 << 16) | 5, ISO_3_2_0 }, // ATL doesn't differentiate multiple channel combinations
{ (121 << 16) | 6, ISO_3_2_1 },
{ (122 << 16) | 6, ISO_3_2_1 }, // ATL doesn't differentiate multiple channel combinations
{ (123 << 16) | 6, ISO_3_2_1 }, // ATL doesn't differentiate multiple channel combinations
{ (124 << 16) | 6, ISO_3_2_1 }, // ATL doesn't differentiate multiple channel combinations
{ (125 << 16) | 7, MPEG_6_1 },
{ (126 << 16) | 8, MPEG_7_1 },
{ (127 << 16) | 8, MPEG_7_1 }, // ATL doesn't differentiate multiple channel combinations
{ (128 << 16) | 8, MPEG_7_1 }, // ATL doesn't differentiate multiple channel combinations
{ (129 << 16) | 8, MPEG_7_1 }, // ATL doesn't differentiate multiple channel combinations
{ (130 << 16) | 8, SMPTE_DTV },
{ (131 << 16) | 3, ITU_2_1 },
{ (132 << 16) | 4, ITU_2_2 },
{ (133 << 16) | 3, DVD_4 },
{ (134 << 16) | 4, DVD_5 },
{ (135 << 16) | 5, DVD_6 },
{ (136 << 16) | 4, DVD_10 },
{ (137 << 16) | 5, DVD_11 },
{ (138 << 16) | 5, DVD_18 },
{ (139 << 16) | 6, AUDIOUNIT_6_0 },
{ (140 << 16) | 7, AUDIOUNIT_7_0 },
{ (141 << 16) | 6, AAC_6_0 },
{ (142 << 16) | 7, AAC_6_1 },
{ (143 << 16) | 7, AAC_7_0 },
{ (144 << 16) | 8, AAC_OCTAGONAL },
{ (145 << 16) | 16, TMH_10_2_STD },
{ (146 << 16) | 21, TMH_10_2_FULL }
};
// Mapping between CAF format ID and format names
private static readonly Dictionary<string, KeyValuePair<int, string>> formatsMapping = new Dictionary<string, KeyValuePair<int, string>>()
{
{ "none", new KeyValuePair<int, string>(0,"") },
{ "lpcm", new KeyValuePair<int, string>(1,"Linear PCM") },
{ "ima4", new KeyValuePair<int, string>(2,"Apple IMA 4:1 ADPCM") },
{ "aac ", new KeyValuePair<int, string>(3,"MPEG-4 AAC") },
{ "MAC3", new KeyValuePair<int, string>(4,"MACE 3:1") },
{ "MAC6", new KeyValuePair<int, string>(5,"MACE 6:1") },
{ "ulaw", new KeyValuePair<int, string>(6,"μLaw 2:1") },
{ "alaw", new KeyValuePair<int, string>(7,"aLaw 2:1") },
{ ".mp1", new KeyValuePair<int, string>(8,"MPEG-1 or 2, Layer 1") },
{ ".mp2", new KeyValuePair<int, string>(9,"MPEG-1 or 2, Layer 2") },
{ ".mp3", new KeyValuePair<int, string>(10,"MPEG-1 or 2, Layer 3") },
{ "alac", new KeyValuePair<int, string>(11,"Apple Lossless") }
};
// Mapping between CAF information keys and ATL frame codes
private static readonly Dictionary<string, Field> frameMapping = new Dictionary<string, Field>() {
{ "artist", Field.ARTIST },
{ "album", Field.ALBUM },
{ "track number", Field.TRACK_NUMBER },
{ "year", Field.PUBLISHING_DATE },
{ "composer", Field.COMPOSER },
{ "genre", Field.GENRE },
{ "title", Field.TITLE },
{ "recorded date", Field.RECORDING_DATE },
{ "comments", Field.COMMENT },
{ "copyright", Field.COPYRIGHT }
};
// Private declarations
private Format containerAudioFormat;
private KeyValuePair<int, string> containeeAudioFormat;
private uint sampleRate;
private bool isVbr;
private uint channelsPerFrame;
private uint bitsPerChannel;
double secondsPerByte;
// ---------- INFORMATIVE INTERFACE IMPLEMENTATIONS & MANDATORY OVERRIDES
public bool IsVBR => isVbr;
public Format AudioFormat
{
get
{
containerAudioFormat = new Format(containerAudioFormat);
containerAudioFormat.Name += " / " + containeeAudioFormat.Value;
containerAudioFormat.ID += containeeAudioFormat.Key;
return containerAudioFormat;
}
}
public int CodecFamily { get; private set; }
public string FileName { get; }
public double BitRate { get; private set; }
public int BitDepth => (int)(bitsPerChannel * channelsPerFrame);
public double Duration { get; private set; }
public int SampleRate => (int)sampleRate;
public ChannelsArrangement ChannelsArrangement { get; private set; }
public List<MetaDataIOFactory.TagType> GetSupportedMetas()
{
return new List<MetaDataIOFactory.TagType> { MetaDataIOFactory.TagType.NATIVE };
}
protected override int getDefaultTagOffset()
{
return TO_BUILTIN;
}
protected override MetaDataIOFactory.TagType getImplementedTagType()
{
return MetaDataIOFactory.TagType.NATIVE;
}
protected override Field getFrameMapping(string zone, string ID, byte tagVersion)
{
Field supportedMetaId = Field.NO_FIELD;
if (frameMapping.TryGetValue(ID, out var value)) supportedMetaId = value;
return supportedMetaId;
}
public long AudioDataOffset { get; set; }
public long AudioDataSize { get; set; }
// ---------- CONSTRUCTORS & INITIALIZERS
protected void resetData()
{
sampleRate = 0;
Duration = 0;
BitRate = 0;
isVbr = false;
CodecFamily = 0;
bitsPerChannel = 0;
channelsPerFrame = 0;
ChannelsArrangement = null;
secondsPerByte = 0;
AudioDataOffset = -1;
AudioDataSize = 0;
}
/// <summary>
/// Constructor
/// </summary>
public CAF(string filePath, Format format)
{
this.FileName = filePath;
containerAudioFormat = format;
resetData();
}
public static bool IsValidHeader(byte[] data)
{
return CAF_MAGIC_NUMBER == StreamUtils.DecodeBEUInt32(data);
}
// ---------- SUPPORT METHODS
private bool readFileHeader(BufferedBinaryReader source)
{
if (!IsValidHeader(source.ReadBytes(4))) return false;
AudioDataOffset = source.Position - 4;
source.Seek(4, SeekOrigin.Current); // Useless here
return true;
}
private void readAudioDescriptionChunk(BufferedBinaryReader source)
{
double m_sampleRate = StreamUtils.DecodeBEDouble(source.ReadBytes(8)); // aka frames per second
string formatId = Utils.Latin1Encoding.GetString(source.ReadBytes(4));
source.Seek(4, SeekOrigin.Current); // format flags
uint bytesPerPacket = StreamUtils.DecodeBEUInt32(source.ReadBytes(4));
uint framesPerPacket = StreamUtils.DecodeBEUInt32(source.ReadBytes(4));
channelsPerFrame = StreamUtils.DecodeBEUInt32(source.ReadBytes(4));
bitsPerChannel = StreamUtils.DecodeBEUInt32(source.ReadBytes(4));
sampleRate = (uint)Math.Round(m_sampleRate);
// Compute audio duration
if (bytesPerPacket > 0)
{
double secondsPerPacket = framesPerPacket / m_sampleRate;
secondsPerByte = secondsPerPacket / bytesPerPacket;
// Duration will be determined using the size of 'data' chunk
}
else
{
secondsPerByte = 0;
isVbr = true;
// Duration will be dertermiend using data from the 'pakt' chunk
}
// Determine audio properties according to the format ID
CodecFamily = AudioDataIOFactory.CF_LOSSY;
switch (formatId)
{
case ("lpcm"):
case ("alac"):
CodecFamily = AudioDataIOFactory.CF_LOSSLESS;
break;
}
// Determine format
if (formatsMapping.ContainsKey(formatId))
containeeAudioFormat = formatsMapping[formatId];
else
containeeAudioFormat = formatsMapping["none"];
}
private void readChannelLayoutChunk(BufferedBinaryReader source)
{
uint channelLayout = StreamUtils.DecodeBEUInt32(source.ReadBytes(4));
// we don't need anything else
if (channelsMapping.ContainsKey(channelLayout)) ChannelsArrangement = channelsMapping[channelLayout];
}
// WARNING : EXPERIMENTAL / UNTESTED DUE TO THE LACK OF METADATA-RICH SAMPLE FILES
private void readStringChunk(BufferedBinaryReader source, string id, long chunkSize)
{
string cookieStr = Utils.Latin1Encoding.GetString(source.ReadBytes((int)chunkSize));
SetMetaField(id, cookieStr, true);
}
// WARNING : EXPERIMENTAL / UNTESTED DUE TO THE LACK OF METADATA-RICH SAMPLE FILES
private void readStringsChunk(BufferedBinaryReader source)
{
uint nbEntries = StreamUtils.DecodeBEUInt32(source.ReadBytes(4));
Dictionary<uint, long> stringIds = new Dictionary<uint, long>();
for (int i = 0; i < nbEntries; i++)
{
uint stringId = StreamUtils.DecodeBEUInt32(source.ReadBytes(4));
long stringOffset = StreamUtils.DecodeBEInt64(source.ReadBytes(8));
stringIds.Add(stringId, stringOffset);
}
long initialPos = source.Position;
string stringValue;
foreach (uint id in stringIds.Keys)
{
source.Seek(initialPos + stringIds[id], SeekOrigin.Begin);
stringValue = StreamUtils.ReadNullTerminatedString(source, Encoding.UTF8);
SetMetaField("str-" + id, stringValue, true);
}
}
// WARNING : EXPERIMENTAL / UNTESTED DUE TO THE LACK OF METADATA-RICH SAMPLE FILES
private void readInfoChunk(BufferedBinaryReader source, bool readAllMetaFrames)
{
uint nbEntries = StreamUtils.DecodeBEUInt32(source.ReadBytes(4));
for (int i = 0; i < nbEntries; i++)
{
string infoKey = StreamUtils.ReadNullTerminatedString(source, Encoding.UTF8);
string infoVal = StreamUtils.ReadNullTerminatedString(source, Encoding.UTF8);
SetMetaField(infoKey, infoVal, readAllMetaFrames);
}
}
private void readPaktChunk(BufferedBinaryReader source)
{
source.Seek(8, SeekOrigin.Current); // nbPackets
long nbFrames = StreamUtils.DecodeBEInt64(source.ReadBytes(8));
Duration = nbFrames * 1000d / sampleRate;
}
/// <inheritdoc/>
public bool Read(Stream source, AudioDataManager.SizeInfo sizeInfo, ReadTagParams readTagParams)
{
return read(source, readTagParams);
}
/// <inheritdoc/>
protected override bool read(Stream source, ReadTagParams readTagParams)
{
ResetData();
BufferedBinaryReader reader = new BufferedBinaryReader(source);
reader.Seek(0, SeekOrigin.Begin);
bool result = readFileHeader(reader);
long cursorPos = reader.Position;
long audioChunkSize = 0;
// Iterate through chunks
while (cursorPos < reader.Length)
{
string chunkType = Utils.Latin1Encoding.GetString(reader.ReadBytes(4));
long chunkSize = StreamUtils.DecodeBEInt64(reader.ReadBytes(8));
if (readTagParams.PrepareForWriting) structureHelper.AddZone(cursorPos, chunkSize + 12, chunkType);
switch (chunkType)
{
case CHUNK_AUDIO_DESC:
readAudioDescriptionChunk(reader);
break;
case CHUNK_CHANNEL_LAYOUT:
readChannelLayoutChunk(reader);
break;
case CHUNK_COOKIE:
case CHUNK_UMID:
if (readTagParams.PrepareForWriting || readTagParams.ReadAllMetaFrames) readStringChunk(reader, chunkType, chunkSize);
break;
case CHUNK_STRINGS:
if (readTagParams.PrepareForWriting || readTagParams.ReadAllMetaFrames) readStringsChunk(reader);
break;
case CHUNK_INFO:
readInfoChunk(reader, readTagParams.PrepareForWriting || readTagParams.ReadAllMetaFrames);
break;
case CHUNK_AUDIO:
AudioDataOffset = cursorPos;
audioChunkSize = chunkSize;
AudioDataSize = chunkSize + 12;
if (secondsPerByte > 0) Duration = chunkSize * secondsPerByte * 1000;
break;
case CHUNK_PACKET_TABLE:
if (Utils.ApproxEquals(secondsPerByte, 0)) readPaktChunk(reader);
break;
}
reader.Seek(cursorPos + chunkSize + 12, SeekOrigin.Begin);
cursorPos = reader.Position;
}
BitRate = audioChunkSize * 8d / Duration;
if (null == ChannelsArrangement)
{
if (1 == channelsPerFrame) ChannelsArrangement = MONO;
else if (2 == channelsPerFrame) ChannelsArrangement = STEREO;
else ChannelsArrangement = new ChannelsArrangement((int)channelsPerFrame, "Custom layout (" + channelsPerFrame + " channels)");
}
return result;
}
// WARNING : NOT IMPLEMENTED DUE TO THE LACK OF METADATA-RICH SAMPLE FILES
/// <inheritdoc/>
protected override int write(TagData tag, Stream s, string zone)
{
throw new NotImplementedException();
}
}
}