import argparse
import typing as t
from dataclasses import dataclass
from enum import Enum
[docs]
class QueryFilter(str, Enum):
"""Enum for query filter options"""
NON_EMPTY = 'non-empty'
EMPTY = 'empty'
HAS_ERROR = 'has-error'
WITHOUT_ERROR = 'without-error'
[docs]
@dataclass
class Arguments:
"""
A wrapper class providing concrete types for parsed command-line arguments.
"""
disable_multi_processing: bool
ensure_non_empty: bool
filter: QueryFilter
groupby_aggregation_probability: float
max_groupby_columns: int
max_merges: int
max_projection_columns: int
max_selection_conditions: int
multi_line: bool
num_queries: int
output_file: t.Optional[str]
projection_probability: float
schema: str
selection_probability: float
sort: bool
verbose: bool
[docs]
@staticmethod
def from_args() -> 'Arguments':
parser = argparse.ArgumentParser(
description='Pandas Query Generator CLI',
formatter_class=HelpFormatter,
)
parser.add_argument(
'--disable-multi-processing',
action='store_true',
help='Generate and execute queries in a consecutive fashion',
)
parser.add_argument(
'--ensure-non-empty',
action='store_true',
help='Ensure generated queries return a non-empty result set when executed on sample data',
)
parser.add_argument(
'--filter',
type=QueryFilter,
choices=list(QueryFilter),
required=False,
default=None,
help='Filter generated queries by specific criteria',
)
parser.add_argument(
'--groupby-aggregation-probability',
type=float,
required=False,
default=0.5,
help='Probability of including groupby aggregation operations',
)
parser.add_argument(
'--max-groupby-columns',
type=int,
required=False,
default=5,
help='Maximum number of columns in group by operations',
)
parser.add_argument(
'--max-merges',
type=int,
required=False,
default=2,
help='Maximum number of table merges allowed',
)
parser.add_argument(
'--max-projection-columns',
type=int,
required=False,
default=5,
help='Maximum number of columns to project',
)
parser.add_argument(
'--max-selection-conditions',
type=int,
required=False,
default=5,
help='Maximum number of conditions in selection operations',
)
parser.add_argument(
'--multi-line',
action='store_true',
help='Format queries on multiple lines',
)
parser.add_argument(
'--num-queries',
type=int,
required=True,
help='The number of queries to generate',
)
parser.add_argument(
'--output-file',
type=str,
required=False,
help='The name of the file to write the results to',
)
parser.add_argument(
'--projection-probability',
type=float,
required=False,
default=0.5,
help='Probability of including projection operations',
)
parser.add_argument(
'--schema',
type=str,
required=True,
help='Path to the relational schema JSON file',
)
parser.add_argument(
'--selection-probability',
type=float,
required=False,
default=0.5,
help='Probability of including selection operations',
)
parser.add_argument(
'--sort',
action='store_true',
help='Whether or not to sort the queries by complexity',
)
parser.add_argument(
'--verbose',
action='store_true',
help='Print extra generation information and statistics',
)
return Arguments(**vars(parser.parse_args()))