package main import ( "bufio" "compress/gzip" "fmt" "os" "time" xmlparser "github.com/tamerh/xml-stream-parser" // Dá se získat pomocí "go get -u github.com/tamerh/xml-stream-parser" ) // Funkce na kontrolu jestli má tag elementu danou hodnotu func hasTagValue(el *xmlparser.XMLElement, name string, value string) bool { for _, tag := range el.Childs["tag"] { if tag.Attrs["k"] == name { return tag.Attrs["v"] == value } } return false } func die(exitcode int, format string, a ...interface{}) { fmt.Fprintf(os.Stderr, format, a...) os.Exit(1) } func main() { if len(os.Args) != 2 { die(1, "Usage: %s \n", os.Args[0]) } // 1. Otevřeme soubor file, err := os.Open(os.Args[1]) if err != nil { die(1, "Cannot open file: %v\n", err) } // 2. Gzip streamový reader gzipReader, err := gzip.NewReader(file) if err != nil { die(1, "Cannot construct gzip stream reader: %v\n", err) } // 3. XML streamový parser buffer := bufio.NewReaderSize(gzipReader, 1024*1024*256) parser := xmlparser.NewXMLParser(buffer, "node", "way") parser.SkipElements([]string{"relation"}).SkipOuterElements() nodes, ways, libNodes, libWays := 0, 0, 0, 0 start := time.Now() // funkce na vypisování statistiky printStat := func() { megabytes := float64(parser.TotalReadSize) / 1024 / 1024 elapsed := time.Now().Sub(start) fmt.Fprintf(os.Stderr, "Elapsed: %-20s Read: %10.2fMB (%.2fMB/s)\tLibraries in nodes: %6d/%-10d\tLibraries in ways: %6d/%-10d\r", elapsed, megabytes, megabytes/elapsed.Seconds(), libNodes, nodes, libWays, ways, ) } // 4. Čteme element po elementu for xml := range parser.Stream() { switch xml.Name { case "node": nodes++ if hasTagValue(xml, "amenity", "library") { libNodes++ fmt.Printf("Library: %v\n", xml) } case "way": ways++ if hasTagValue(xml, "amenity", "library") { libWays++ fmt.Printf("Library area: %v\n", xml) } } // Vypíšeme statistiku každých 10000 načtených elementů if (nodes+ways)%10000 == 0 { printStat() } } printStat() fmt.Fprintf(os.Stderr, "\n\nTotal time: %v\n", time.Now().Sub(start)) }