Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions cmds/extract_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,25 @@ func (cmd *ExtractCommand) AddFlags(fl *pflag.FlagSet) {
fl.StringArrayVar(&cmd.extensionDefs, "ext", nil, "Include GTFS Extension")
fl.IntVar(&cmd.fvid, "fvid", 0, "Specify FeedVersionID when writing to a database")
fl.BoolVar(&cmd.create, "create", false, "Create a basic database schema if none exists")
fl.BoolVar(&cmd.writeExtraColumns, "write-extra-columns", false, "Include extra columns in output")

// Copy options
fl.Float64Var(&cmd.SimplifyShapes, "simplify-shapes", 0.0, "Simplify shapes with this tolerance (ex. 0.000005)")
fl.BoolVar(&cmd.AllowEntityErrors, "allow-entity-errors", false, "Allow entities with errors to be copied")
fl.IntVar(&cmd.Options.ErrorLimit, "error-limit", 10, "Max number of detailed errors per error group")
fl.BoolVar(&cmd.AllowReferenceErrors, "allow-reference-errors", false, "Allow entities with reference errors to be copied")
fl.BoolVar(&cmd.InterpolateStopTimes, "interpolate-stop-times", false, "Interpolate missing StopTime arrival/departure values")
fl.BoolVar(&cmd.CreateMissingShapes, "create-missing-shapes", false, "Create missing Shapes from Trip stop-to-stop geometries")
fl.BoolVar(&cmd.NormalizeServiceIDs, "normalize-service-ids", false, "Create any missing Calendar entities for CalendarDate service_id's")
fl.BoolVar(&cmd.Options.AllowReferenceErrors, "allow-reference-errors", false, "Allow entities with reference errors to be copied")
fl.BoolVar(&cmd.Options.InterpolateStopTimes, "interpolate-stop-times", false, "Interpolate missing StopTime arrival/departure values")
fl.BoolVar(&cmd.Options.CreateMissingShapes, "create-missing-shapes", false, "Create missing Shapes from Trip stop-to-stop geometries")
fl.BoolVar(&cmd.Options.NormalizeServiceIDs, "normalize-service-ids", false, "Create any missing Calendar entities for CalendarDate service_id's")
fl.BoolVar(&cmd.Options.DeduplicateJourneyPatterns, "deduplicate-stop-times", false, "Deduplicate StopTimes using Journey Patterns")
fl.BoolVar(&cmd.SimplifyCalendars, "simplify-calendars", false, "Attempt to simplify CalendarDates into regular Calendars")
fl.BoolVar(&cmd.Options.NormalizeTimezones, "normalize-timezones", false, "Normalize timezones and apply default stop timezones based on agency and parent stops")
fl.BoolVar(&cmd.UseBasicRouteTypes, "use-basic-route-types", false, "Collapse extended route_type's into basic GTFS values")
fl.BoolVar(&cmd.CopyExtraFiles, "write-extra-files", false, "Copy additional files found in source to destination")
fl.BoolVar(&cmd.writeExtraColumns, "write-extra-columns", false, "Include extra columns in output")
fl.BoolVar(&cmd.Options.SimplifyCalendars, "simplify-calendars", false, "Attempt to simplify CalendarDates into regular Calendars")
fl.BoolVar(&cmd.Options.CopyExtraFiles, "write-extra-files", false, "Copy additional files found in source to destination")

// Common extension options
fl.Float64Var(&cmd.Options.CommonExtensions.SimplifyShapes, "simplify-shapes", 0.0, "Simplify shapes with this tolerance (ex. 0.000005)")
fl.BoolVar(&cmd.Options.CommonExtensions.NormalizeTimezones, "normalize-timezones", false, "Normalize timezones and apply default stop timezones based on agency and parent stops")
fl.BoolVar(&cmd.Options.CommonExtensions.UseBasicRouteTypes, "use-basic-route-types", false, "Collapse extended route_type's into basic GTFS values")
fl.Float64Var(&cmd.Options.CommonExtensions.ShapeMaxSegmentLength, "shape-max-segment-length", 0.0, "Maximum shape segment length in meters (0.0 to disable check)")
fl.BoolVar(&cmd.Options.CommonExtensions.NullIslandCheck, "null-island-check", false, "Check for Null Island in shapes.txt and stops.txt")

// Extract options
fl.StringArrayVar(&cmd.extractAgencies, "extract-agency", nil, "Extract Agency")
Expand Down
10 changes: 8 additions & 2 deletions cmds/import_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,19 @@ func (cmd *ImportCommand) AddFlags(fl *pflag.FlagSet) {
fl.BoolVar(&cmd.Latest, "latest", false, "Only import latest feed version available for each feed")
fl.BoolVar(&cmd.DryRun, "dryrun", false, "Dry run; print feeds that would be imported and exit")
fl.BoolVar(&cmd.Options.Activate, "activate", false, "Set as active feed version after import")

// Copy options
fl.Float64Var(&cmd.Options.SimplifyShapes, "simplify-shapes", 0.0, "Simplify shapes with this tolerance (ex. 0.000005)")
fl.BoolVar(&cmd.Options.InterpolateStopTimes, "interpolate-stop-times", false, "Interpolate missing StopTime arrival/departure values")
fl.BoolVar(&cmd.Options.DeduplicateJourneyPatterns, "deduplicate-stop-times", false, "Deduplicate StopTimes using Journey Patterns")
fl.BoolVar(&cmd.Options.CreateMissingShapes, "create-missing-shapes", false, "Create missing Shapes from Trip stop-to-stop geometries")
fl.BoolVar(&cmd.Options.SimplifyCalendars, "simplify-calendars", false, "Attempt to simplify CalendarDates into regular Calendars")
fl.BoolVar(&cmd.Options.NormalizeTimezones, "normalize-timezones", false, "Normalize timezones and apply default stop timezones based on agency and parent stops")

// Common extension options
fl.Float64Var(&cmd.Options.CommonExtensions.SimplifyShapes, "simplify-shapes", 0.0, "Simplify shapes with this tolerance (ex. 0.000005)")
fl.BoolVar(&cmd.Options.CommonExtensions.NormalizeTimezones, "normalize-timezones", false, "Normalize timezones and apply default stop timezones based on agency and parent stops")
fl.BoolVar(&cmd.Options.CommonExtensions.UseBasicRouteTypes, "use-basic-route-types", false, "Collapse extended route_type's into basic GTFS values")
fl.Float64Var(&cmd.Options.CommonExtensions.ShapeMaxSegmentLength, "shape-max-segment-length", 0.0, "Maximum shape segment length in meters (0.0 to disable check)")
fl.BoolVar(&cmd.Options.CommonExtensions.NullIslandCheck, "null-island-check", false, "Check for Null Island in shapes.txt and stops.txt")
}

// Parse command line flags
Expand Down
132 changes: 9 additions & 123 deletions copier/copier.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@ import (
"github.com/interline-io/log"
"github.com/interline-io/transitland-lib/adapters"
"github.com/interline-io/transitland-lib/causes"
"github.com/interline-io/transitland-lib/ext"
"github.com/interline-io/transitland-lib/filters"
"github.com/interline-io/transitland-lib/gtfs"
"github.com/interline-io/transitland-lib/internal/geomcache"
"github.com/interline-io/transitland-lib/rules"
"github.com/interline-io/transitland-lib/service"
"github.com/interline-io/transitland-lib/tlcsv"
"github.com/interline-io/transitland-lib/tlxy"
Expand Down Expand Up @@ -95,80 +93,6 @@ type hasLine interface {
////////// Copier //////////
////////////////////////////

// Options defines the settable options for a Copier.
type Options struct {
// Batch size
BatchSize int
// Skip most validation filters
NoValidators bool
// Skip shape cache
NoShapeCache bool
// Attempt to save an entity that returns validation errors
AllowEntityErrors bool
AllowReferenceErrors bool
// Interpolate any missing StopTime values: ArrivalTime/DepartureTime/ShapeDistTraveled
InterpolateStopTimes bool
// Create a stop-to-stop Shape for Trips without a ShapeID.
CreateMissingShapes bool
// Create missing Calendar entries
NormalizeServiceIDs bool
// Normalize timezones, e.g. US/Pacific -> America/Los_Angeles
NormalizeTimezones bool
// Simplify Calendars that use mostly CalendarDates
SimplifyCalendars bool
// Convert extended route types to primitives
UseBasicRouteTypes bool
// Copy extra files (requires CSV input)
CopyExtraFiles bool
// Simplify shapes
SimplifyShapes float64
// Convert route network_id to networks.txt/route_networks.txt
NormalizeNetworks bool
// DeduplicateStopTimes
DeduplicateJourneyPatterns bool
// Error limit
ErrorLimit int
// Logging level
Quiet bool
// Default error handler
ErrorHandler ErrorHandler
// Entity selection strategy
Marker Marker
// Journey Pattern Key Function
JourneyPatternKey func(*gtfs.Trip) string
// Named extensions
ExtensionDefs []string
// Initialized extensions
exts []optionExtLevel
}

type optionExtLevel struct {
ext any
level int
}

func (opts *Options) AddExtension(ext any) {
opts.AddExtensionWithLevel(ext, 0)
}

func (opts *Options) ParseExtensionDef(extDef string) (ext.Extension, error) {
extName, extArgs, err := ext.ParseExtensionArgs(extDef)
if err != nil {
return nil, err
}
e, err := ext.GetExtension(extName, extArgs)
if err != nil {
return nil, fmt.Errorf("error creating extension '%s' with args '%s': %s", extName, extArgs, err.Error())
} else if e == nil {
return nil, fmt.Errorf("no registered extension for '%s'", extName)
}
return e, nil
}

func (opts *Options) AddExtensionWithLevel(e any, level int) {
opts.exts = append(opts.exts, optionExtLevel{ext: e, level: level})
}

////////////////////////////////////
// Copier
////////////////////////////////////
Expand Down Expand Up @@ -275,62 +199,24 @@ func NewCopier(ctx context.Context, reader adapters.Reader, writer adapters.Writ
}

// Default set of validators
var addExts []any
addExts = append(addExts, copier.geomCache)

// Minimal validators
if !opts.NoValidators {
addExts = append(addExts,
&rules.EntityDuplicateIDCheck{},
&rules.EntityDuplicateKeyCheck{},
&rules.ValidFarezoneCheck{},
&rules.AgencyIDConditionallyRequiredCheck{},
&rules.StopTimeSequenceCheck{},
&rules.InconsistentTimezoneCheck{},
&rules.ParentStationLocationTypeCheck{},
&rules.CalendarDuplicateDates{},
&rules.FareProductRiderCategoryDefaultCheck{},
&rules.TransferStopLocationTypeCheck{},
)
}

// Default extensions
if copier.options.UseBasicRouteTypes {
// Convert extended route types to basic route types
addExts = append(addExts, &filters.BasicRouteTypeFilter{})
}
if copier.options.NormalizeTimezones {
// Normalize timezones and apply agency/stop timezones where empty
addExts = append(addExts, &filters.NormalizeTimezoneFilter{})
addExts = append(addExts, &filters.ApplyParentTimezoneFilter{})
}
if copier.options.SimplifyShapes > 0 {
// Simplify shapes.txt
addExts = append(addExts, &filters.SimplifyShapeFilter{SimplifyValue: copier.options.SimplifyShapes})
}
if copier.options.NormalizeNetworks {
// Convert routes.txt network_id to networks.txt/route_networks.txt
addExts = append(addExts, &filters.RouteNetworkIDFilter{})
} else {
addExts = append(addExts, &filters.RouteNetworkIDCompatFilter{})
}
if copier.options.SimplifyCalendars && copier.options.NormalizeServiceIDs {
// Simplify calendar and calendar dates
addExts = append(addExts, &filters.SimplifyCalendarFilter{})
}

// Set default extension level to 0
commonExts := copier.options.CommonExtensions.Extensions()
var addExtLevels []optionExtLevel
for _, e := range addExts {
addExtLevels = append(addExtLevels, optionExtLevel{ext: copier.geomCache, level: 0})
for _, e := range commonExts {
addExtLevels = append(addExtLevels, optionExtLevel{ext: e, level: 0})
}

// Simplify calendar and calendar dates
if copier.options.SimplifyCalendars && copier.options.NormalizeServiceIDs {
addExtLevels = append(addExtLevels, optionExtLevel{ext: &filters.SimplifyCalendarFilter{}, level: 0})
}

// Add Option extensions
addExtLevels = append(addExtLevels, opts.exts...)

// Parse option extension defs
for _, extDef := range opts.ExtensionDefs {
e, err := opts.ParseExtensionDef(extDef)
e, err := ParseExtensionDef(extDef)
if err != nil {
return nil, fmt.Errorf("failed to parse extension: %s", err.Error())
}
Expand Down
152 changes: 152 additions & 0 deletions copier/opts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package copier

import (
"fmt"

"github.com/interline-io/transitland-lib/ext"
"github.com/interline-io/transitland-lib/filters"
"github.com/interline-io/transitland-lib/gtfs"
"github.com/interline-io/transitland-lib/rules"
)

type CommonExtensions struct {
// Skip most validation filters
NoValidators bool
// Normalize timezones, e.g. US/Pacific -> America/Los_Angeles
NormalizeTimezones bool
// Convert extended route types to primitives
UseBasicRouteTypes bool
// Simplify shapes
SimplifyShapes float64
// Convert route network_id to networks.txt/route_networks.txt
NormalizeNetworks bool
// Maximum shape segment length in meters
ShapeMaxSegmentLength float64
// Exclude stops and shapes with one or both zero coordinates
NullIslandCheck bool
}

func (opts *CommonExtensions) Extensions() []any {
// Default set of validators
var addExts []any

// Minimal validators
if !opts.NoValidators {
addExts = append(addExts,
&rules.EntityDuplicateIDCheck{},
&rules.EntityDuplicateKeyCheck{},
&rules.ValidFarezoneCheck{},
&rules.AgencyIDConditionallyRequiredCheck{},
&rules.StopTimeSequenceCheck{},
&rules.InconsistentTimezoneCheck{},
&rules.ParentStationLocationTypeCheck{},
&rules.CalendarDuplicateDates{},
&rules.FareProductRiderCategoryDefaultCheck{},
&rules.TransferStopLocationTypeCheck{},
)
}

// Optional rules that are best practices but can
// have a significant data quality impact
if opts.ShapeMaxSegmentLength > 0 {
// Check shape segment lengths
addExts = append(addExts, &rules.ShapeMaxSegmentLengthCheck{
MaxAllowedDistance: opts.ShapeMaxSegmentLength,
})
}
if opts.NullIslandCheck {
// Exclude stops with zero coordinates
addExts = append(addExts, &rules.NullIslandCheck{})
}

// Optional filters for common data transformations
if opts.UseBasicRouteTypes {
// Convert extended route types to basic route types
addExts = append(addExts, &filters.BasicRouteTypeFilter{})
}
if opts.NormalizeTimezones {
// Normalize timezones and apply agency/stop timezones where empty
addExts = append(addExts, &filters.NormalizeTimezoneFilter{})
addExts = append(addExts, &filters.ApplyParentTimezoneFilter{})
}
if opts.SimplifyShapes > 0 {
// Simplify shapes.txt
addExts = append(addExts, &filters.SimplifyShapeFilter{
SimplifyValue: opts.SimplifyShapes,
})
}
if opts.NormalizeNetworks {
// Convert routes.txt network_id to networks.txt/route_networks.txt
addExts = append(addExts, &filters.RouteNetworkIDFilter{})
} else {
addExts = append(addExts, &filters.RouteNetworkIDCompatFilter{})
}

return addExts
}

// Options defines the settable options for a Copier.
type Options struct {
// Batch size
BatchSize int
// Skip shape cache
NoShapeCache bool
// Attempt to save an entity that returns validation errors
AllowEntityErrors bool
AllowReferenceErrors bool
// Interpolate any missing StopTime values: ArrivalTime/DepartureTime/ShapeDistTraveled
InterpolateStopTimes bool
// Create a stop-to-stop Shape for Trips without a ShapeID.
CreateMissingShapes bool
// Create missing Calendar entries
NormalizeServiceIDs bool
// Simplify Calendars that use mostly CalendarDates
SimplifyCalendars bool
// Copy extra files (requires CSV input)
CopyExtraFiles bool
// DeduplicateStopTimes
DeduplicateJourneyPatterns bool
// Error limit
ErrorLimit int
// Logging level
Quiet bool
// Default error handler
ErrorHandler ErrorHandler
// Entity selection strategy
Marker Marker
// Journey Pattern Key Function
JourneyPatternKey func(*gtfs.Trip) string
// Named extensions
ExtensionDefs []string
// Common extensions
CommonExtensions
// Initialized extensions
exts []optionExtLevel
}

type optionExtLevel struct {
ext any
level int
}

func (opts *Options) AddExtension(ext any) {
opts.AddExtensionWithLevel(ext, 0)
}

func ParseExtensionDef(extDef string) (ext.Extension, error) {
extName, extArgs, err := ext.ParseExtensionArgs(extDef)
if err != nil {
return nil, err
}
e, err := ext.GetExtension(extName, extArgs)
if err != nil {
return nil, fmt.Errorf("error creating extension '%s' with args '%s': %s", extName, extArgs, err.Error())
} else if e == nil {
return nil, fmt.Errorf("no registered extension for '%s'", extName)
}
return e, nil
}

func (opts *Options) AddExtensionWithLevel(e any, level int) {
opts.exts = append(opts.exts, optionExtLevel{ext: e, level: level})
}
Loading
Loading