From 713fa11fdc30d54266d9c247c01ea16207368df8 Mon Sep 17 00:00:00 2001 From: mhwlng Date: Sat, 12 Dec 2020 11:35:51 +0100 Subject: [PATCH] change handling of EDSM json data to prevent out of memory error (json data was 50MB, is now 1.4GB) --- Elite/Elite.csproj | 4 +- Elite/Properties/AssemblyInfo.cs | 4 +- Elite/packages.config | 2 +- ImportData/Program.cs | 105 +++++++++++++++++++------------ 4 files changed, 71 insertions(+), 44 deletions(-) diff --git a/Elite/Elite.csproj b/Elite/Elite.csproj index 4e535b7..3fc9814 100644 --- a/Elite/Elite.csproj +++ b/Elite/Elite.csproj @@ -56,8 +56,8 @@ ..\packages\Costura.Fody.4.1.0\lib\net40\Costura.dll - - ..\packages\CsvHelper.17.0.1\lib\net47\CsvHelper.dll + + ..\packages\CsvHelper.18.0.0\lib\net47\CsvHelper.dll ..\packages\Hardcodet.NotifyIcon.Wpf.1.0.8\lib\net451\Hardcodet.Wpf.TaskbarNotification.dll diff --git a/Elite/Properties/AssemblyInfo.cs b/Elite/Properties/AssemblyInfo.cs index 3f44022..998bf9c 100644 --- a/Elite/Properties/AssemblyInfo.cs +++ b/Elite/Properties/AssemblyInfo.cs @@ -31,7 +31,7 @@ // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.5.8.0")] -[assembly: AssemblyFileVersion("1.5.8.0")] +[assembly: AssemblyVersion("1.5.9.0")] +[assembly: AssemblyFileVersion("1.5.9.0")] [assembly: log4net.Config.XmlConfigurator(Watch = true)] \ No newline at end of file diff --git a/Elite/packages.config b/Elite/packages.config index 90096e9..af825c5 100644 --- a/Elite/packages.config +++ b/Elite/packages.config @@ -1,7 +1,7 @@  - + diff --git a/ImportData/Program.cs b/ImportData/Program.cs index 453f394..eba1793 100644 --- a/ImportData/Program.cs +++ b/ImportData/Program.cs @@ -13,34 +13,45 @@ namespace ImportData { - class Program + public static class JsonReaderExtensions { - private static readonly ILog Log = - LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); + private static string GetExePath() + { + var strExeFilePath = System.Reflection.Assembly.GetExecutingAssembly().Location; + return Path.GetDirectoryName(strExeFilePath); + } - private static byte[] Decompress(byte[] gzip) + public static IEnumerable ParseJson(string path) { - using (var stream = new GZipStream(new MemoryStream(gzip), - CompressionMode.Decompress)) + path = Path.Combine(GetExePath(), path); + + if (File.Exists(path)) { - const int size = 100000; - var buffer = new byte[size]; - using (var memory = new MemoryStream()) + var serializer = new JsonSerializer(); + using (var s = File.Open(path, FileMode.Open)) { - int count; - do + using (var sr = new StreamReader(s)) { - count = stream.Read(buffer, 0, size); - if (count > 0) + using (var reader = new JsonTextReader(sr)) { - memory.Write(buffer, 0, count); + while (reader.Read()) + { + if (reader.TokenType == JsonToken.StartObject) + { + yield return serializer.Deserialize(reader); + } + } } } - while (count > 0); - return memory.ToArray(); } } } + } + + class Program + { + private static readonly ILog Log = + LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); private static string GetExePath() { @@ -136,32 +147,43 @@ private static bool NeedToUpdateFile(string path, int minutes) return true; } - - private static string DownloadJson(string path, string url, ref bool wasUpdated) + private static void DownloadJson(string path, string url, ref bool wasUpdated) { path = Path.Combine(GetExePath(), path); DeleteExpiredFile(path, 1440); - if (File.Exists(path)) - { - return File.ReadAllText(path); - } - else + if (!File.Exists(path)) { using (var client = new WebClient()) { client.Headers[HttpRequestHeader.AcceptEncoding] = "gzip"; - var data = client.DownloadData(url); - var decompress = Decompress(data); - - File.WriteAllBytes(path, decompress); - wasUpdated = true; + using (var compressedStream = new MemoryStream(client.DownloadData(url))) + { + using (var stream = new GZipStream(compressedStream, CompressionMode.Decompress)) + { + using (BinaryWriter binWriter = + new BinaryWriter(File.Open(path, FileMode.Create))) + { + const int size = 100000; + var buffer = new byte[size]; + int count; + do + { + count = stream.Read(buffer, 0, size); + if (count > 0) + { + binWriter.Write(buffer, 0, count); + } + } while (count > 0); + } + } + } + } - return File.ReadAllText(path); // not efficient, but gets around out of memory error + wasUpdated = true; - } } } @@ -636,30 +658,33 @@ static void Main(string[] args) try { List stationsEDSM = null; + List stationsEDDBList = null; Dictionary stationsEDDB; + List populatedSystemsEDDBList; Dictionary populatedSystemsEDDBbyEdsmId; var wasAnyUpdated = false; Console.WriteLine("downloading populated systems from EDDB"); - var jsonPopulatedsystemsEDDBText = DownloadJson(@"Data\populatedsystemsEDDB.json", "https://eddb.io/archive/v6/systems_populated.json", ref wasAnyUpdated); + DownloadJson(@"Data\populatedsystemsEDDB.json", "https://eddb.io/archive/v6/systems_populated.json", ref wasAnyUpdated); Console.WriteLine("downloading station list from EDSM"); - var jsonStationsEDSMText = DownloadJson(@"Data\stationsEDSM.json", "https://www.edsm.net/dump/stations.json.gz", ref wasAnyUpdated); + DownloadJson(@"Data\stationsEDSM.json", "https://www.edsm.net/dump/stations.json.gz", ref wasAnyUpdated); Console.WriteLine("downloading station list from EDDB"); - var jsonStationsEDDBText = DownloadJson(@"Data\stationsEDDB.json", "https://eddb.io/archive/v6/stations.json", ref wasAnyUpdated); + DownloadJson(@"Data\stationsEDDB.json", "https://eddb.io/archive/v6/stations.json", ref wasAnyUpdated); Console.WriteLine("checking station and system data"); + populatedSystemsEDDBList = JsonReaderExtensions.ParseJson(@"Data\populatedsystemsEDDB.json").ToList(); + if (NeedToUpdateFile(@"Data\cnbsystems.json", 1440)) { // there are multiple stations with the same name ??? - var populatedSystemsEDDBbyName = JsonConvert - .DeserializeObject>(jsonPopulatedsystemsEDDBText) + var populatedSystemsEDDBbyName = populatedSystemsEDDBList .GroupBy(x => x.Name).Select(x => x.First()) .ToDictionary(x => x.Name); @@ -668,20 +693,22 @@ static void Main(string[] args) if (wasAnyUpdated || NeedToUpdateFile(@"Data\painitestations.json", 15)) { - populatedSystemsEDDBbyEdsmId = JsonConvert - .DeserializeObject>(jsonPopulatedsystemsEDDBText) + populatedSystemsEDDBbyEdsmId = populatedSystemsEDDBList .Where(x => x.EdsmId != null) .ToDictionary(x => (int) x.EdsmId); - stationsEDSM = JsonConvert.DeserializeObject>(jsonStationsEDSMText); + + stationsEDDBList = JsonReaderExtensions.ParseJson(@"Data\stationsEDDB.json").ToList(); // there are multiple stations with the same name ??? - stationsEDDB = JsonConvert.DeserializeObject>(jsonStationsEDDBText) + stationsEDDB = stationsEDDBList .GroupBy(x => x.Name).Select(x => x.First()) .ToDictionary(x => x.Name); Console.WriteLine("looking up additional EDDB station information for all stations"); + stationsEDSM = JsonReaderExtensions.ParseJson(@"Data\stationsEDSM.json").ToList(); + stationsEDSM.ForEach(z => { if (stationsEDDB.ContainsKey(z.Name))