from csv import reader, writer from json import dumps def readdepcsv(incsvfn, cols=8): with open(incsvfn, 'r') as f: deps = list([(row + ['']*(cols-len(row))) for row in reader(f)]) return deps def linedict(deps): lines = {} for dep in deps: seendate, seentime = dep[0].split() stopname = dep[1] stopifopt = dep[2] # stop = (stopifopt, stopname) stop = stopname line = dep[3] direction = dep[4] deptime = dep[5] rtstatus = dep[6] genattr = dep[7] if stop == "Am Schultenhof": continue if line == "594" and direction == "Schwerte, Bahnhof" and stop in ("Boele Markt", "Kabel Bf"): continue if not line.startswith(('SB')) and line.startswith(('IC', 'S', 'RE', 'RB', 'U')): continue #print(seendate+" "+seentime, stopname, stopifopt, line, direction, deptime, rtstatus, genattr) if line not in lines: lines[line] = {} if direction not in lines[line]: lines[line][direction] = {} if stop not in lines[line][direction]: lines[line][direction][stop] = {} if deptime not in lines[line][direction][stop]: lines[line][direction][stop][deptime] = {} if seendate not in lines[line][direction][stop][deptime]: lines[line][direction][stop][deptime][seendate] = [] lines[line][direction][stop][deptime][seendate].append((seentime, rtstatus, genattr)) return lines def linedictstr(lines): datecount = {} count = 0 lds = "@ Linie -> Richtung\n@ - Haltestelle\n@ -- Abfahrtszeit\n@ --- Ausfalltage (-zeiten)\n\n" tmps = "" for line in sorted(lines): for direction in sorted(lines[line]): tmps += "\n"+line+" -> "+direction+"\n" for stop in sorted(lines[line][direction], key=lambda x: min(lines[line][direction][x])): tmps += "- "+stop+"\n" for deptime in sorted(lines[line][direction][stop]): count += 1 tmps += "-- "+deptime+"\n" for seendate in sorted(lines[line][direction][stop][deptime]): if seendate not in datecount: datecount[seendate] = 0 datecount[seendate] += 1 seentimes = lines[line][direction][stop][deptime][seendate] tmps += "--- "+seendate+" ("+",".join(s[0] for s in sorted(seentimes))+")\n" datedepcount = dict.fromkeys(datecount) tmpc = "" _calle = 0 for date in sorted(datedepcount): datedepcount[date] = {} tmpc += "\n"+date+"\n" _c = 0 for line in sorted(lines): datedepcount[date][line] = {} for direction in sorted(lines[line]): datedepcount[date][line][direction] = {} for stop in lines[line][direction]: datedepcount[date][line][direction][stop] = [] for deptime in lines[line][direction][stop]: for seendate in lines[line][direction][stop][deptime]: if date == seendate and deptime not in datedepcount[date][line][direction][stop]: datedepcount[date][line][direction][stop].append(deptime) if any(datedepcount[date][line][direction][x] for x in datedepcount[date][line][direction]): _ddc = max(len(datedepcount[date][line][direction][x]) for x in datedepcount[date][line][direction]) _c += _ddc tmpc += f"{_ddc}\t{line}\t{direction}\n" tmpc += f"Summe {date}: {_c}\n" _calle += _c tmpc += f"\nSumme komplett: {_calle}" lds += f"@ Gesamtanzahl ausgefallener Abfahrten: {sum([c for d, c in datecount.items()])}\n" lds += f"@ tagesübergreifend: {count}\n\n@ nach Tag:\n" for date, dc in sorted(datecount.items()): lds += f"@ {date}: {dc}\n" lds += "\nzusätzlich ausgeschlossen: Boele Markt&Kabel Bf (594->Schwerte Bf.) und Am Schultenhof\n\n" lds += "Daten vom 2019-01-14 sind leider nicht vollständig.\n\n" lds += "grob geschätzte Fahrtenanzahl:\n"+tmpc+"\n\nListe:\n"+tmps return lds if __name__ == "__main__": lines = linedict(readdepcsv('./deps.csv')) with open('./deps-noschultenhof.txt', 'w', encoding='utf-8') as f: f.write(linedictstr(lines)) #print(linedictstr(lines)) #print(dumps(lines))