<span style="color: #0000ff">import</span><span style="color: #000000"> os
</span><span style="color: #0000ff">import</span><span style="color: #000000"> re
</span><span style="color: #0000ff">import</span><span style="color: #000000"> sys
</span><span style="color: #0000ff">import</span><span style="color: #000000"> csv
</span><span style="color: #0000ff">import</span><span style="color: #000000"> time
</span><span style="color: #0000ff">import</span><span style="color: #000000"> argparse
</span><span style="color: #0000ff">import</span><span style="color: #000000"> collections
</span><span style="color: #0000ff">import</span><span style="color: #000000"> MySQLdb
</span><span style="color: #0000ff">import</span><span style="color: #000000"> warnings
</span><span style="color: #008000">#</span><span style="color: #008000"> suppress annoying mysql warnings</span>
warnings.filterwarnings(action=<span style="color: #800000">'</span><span style="color: #800000">ignore</span><span style="color: #800000">'</span>, category=<span style="color: #000000">MySQLdb.Warning)
</span><span style="color: #0000ff">def</span><span style="color: #000000"> get_type(s):
</span><span style="color: #800000">"""</span><span style="color: #800000">Find type for this string
</span><span style="color: #800000">"""</span>
<span style="color: #008000">#</span><span style="color: #008000"> try integer type</span>
<span style="color: #0000ff">try</span><span style="color: #000000">:
v </span>=<span style="color: #000000"> int(s)
</span><span style="color: #0000ff">except</span><span style="color: #000000"> ValueError:
</span><span style="color: #0000ff">pass</span>
<span style="color: #0000ff">else</span><span style="color: #000000">:
</span><span style="color: #0000ff">if</span> abs(v) > 2147483647<span style="color: #000000">:
</span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">bigint</span><span style="color: #800000">'</span>
<span style="color: #0000ff">else</span><span style="color: #000000">:
</span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">int</span><span style="color: #800000">'</span>
<span style="color: #008000">#</span><span style="color: #008000"> try float type</span>
<span style="color: #0000ff">try</span><span style="color: #000000">:
float(s)
</span><span style="color: #0000ff">except</span><span style="color: #000000"> ValueError:
</span><span style="color: #0000ff">pass</span>
<span style="color: #0000ff">else</span><span style="color: #000000">:
</span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">double</span><span style="color: #800000">'</span>
<span style="color: #008000">#</span><span style="color: #008000"> check for timestamp</span>
dt_formats =<span style="color: #000000"> (
(</span><span style="color: #800000">'</span><span style="color: #800000">%Y-%m-%d %H:%M:%S</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">datetime</span><span style="color: #800000">'</span><span style="color: #000000">),
(</span><span style="color: #800000">'</span><span style="color: #800000">%Y-%m-%d %H:%M:%S.%f</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">datetime</span><span style="color: #800000">'</span><span style="color: #000000">),
(</span><span style="color: #800000">'</span><span style="color: #800000">%Y-%m-%d</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">date</span><span style="color: #800000">'</span><span style="color: #000000">),
(</span><span style="color: #800000">'</span><span style="color: #800000">%H:%M:%S</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">time</span><span style="color: #800000">'</span><span style="color: #000000">),
)
</span><span style="color: #0000ff">for</span> dt_format, dt_type <span style="color: #0000ff">in</span><span style="color: #000000"> dt_formats:
</span><span style="color: #0000ff">try</span><span style="color: #000000">:
time.strptime(s, dt_format)
</span><span style="color: #0000ff">except</span><span style="color: #000000"> ValueError:
</span><span style="color: #0000ff">pass</span>
<span style="color: #0000ff">else</span><span style="color: #000000">:
</span><span style="color: #0000ff">return</span><span style="color: #000000"> dt_type
</span><span style="color: #008000">#</span><span style="color: #008000"> doesn't match any other types so assume text</span>
<span style="color: #0000ff">if</span> len(s) > 255<span style="color: #000000">:
</span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">text</span><span style="color: #800000">'</span>
<span style="color: #0000ff">else</span><span style="color: #000000">:
</span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">varchar(255)</span><span style="color: #800000">'</span>
<span style="color: #0000ff">def</span> most_common(l, default=<span style="color: #800000">'</span><span style="color: #800000">varchar(255)</span><span style="color: #800000">'</span><span style="color: #000000">):
</span><span style="color: #800000">"""</span><span style="color: #800000">Return most common value from list
</span><span style="color: #800000">"""</span>
<span style="color: #008000">#</span><span style="color: #008000"> some formats trump others</span>
<span style="color: #0000ff">if</span><span style="color: #000000"> l:
</span><span style="color: #0000ff">for</span> dt_type <span style="color: #0000ff">in</span> (<span style="color: #800000">'</span><span style="color: #800000">text</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">bigint</span><span style="color: #800000">'</span><span style="color: #000000">):
</span><span style="color: #0000ff">if</span> dt_type <span style="color: #0000ff">in</span><span style="color: #000000"> l:
</span><span style="color: #0000ff">return</span><span style="color: #000000"> dt_type
</span><span style="color: #0000ff">return</span> max(l, key=<span style="color: #000000">l.count)
</span><span style="color: #0000ff">return</span><span style="color: #000000"> default
</span><span style="color: #0000ff">def</span> get_col_types(input_file, max_rows=1000<span style="color: #000000">):
</span><span style="color: #800000">"""</span><span style="color: #800000">Find the type for each CSV column
</span><span style="color: #800000">"""</span><span style="color: #000000">
csv_types </span>=<span style="color: #000000"> collections.defaultdict(list)
reader </span>=<span style="color: #000000"> csv.reader(open(input_file))
</span><span style="color: #008000">#</span><span style="color: #008000"> test the first few rows for their data types</span>
<span style="color: #0000ff">for</span> row_i, row <span style="color: #0000ff">in</span><span style="color: #000000"> enumerate(reader):
</span><span style="color: #0000ff">if</span> row_i ==<span style="color: #000000"> 0:
header </span>=<span style="color: #000000"> row
</span><span style="color: #0000ff">else</span><span style="color: #000000">:
</span><span style="color: #0000ff">for</span> col_i, s <span style="color: #0000ff">in</span><span style="color: #000000"> enumerate(row):
data_type </span>=<span style="color: #000000"> get_type(s)
csv_types[header[col_i]].append(data_type)
</span><span style="color: #0000ff">if</span> row_i ==<span style="color: #000000"> max_rows:
</span><span style="color: #0000ff">break</span>
<span style="color: #008000">#</span><span style="color: #008000"> take the most common data type for each row</span>
<span style="color: #0000ff">return</span> [most_common(csv_types[col]) <span style="color: #0000ff">for</span> col <span style="color: #0000ff">in</span><span style="color: #000000"> header]
</span><span style="color: #0000ff">def</span><span style="color: #000000"> get_schema(table, header, col_types):
</span><span style="color: #800000">"""</span><span style="color: #800000">Generate the schema for this table from given types and columns
</span><span style="color: #800000">"""</span><span style="color: #000000">
schema_sql </span>= <span style="color: #800000">"""</span><span style="color: #800000">CREATE TABLE IF NOT EXISTS %s (
id int NOT NULL AUTO_INCREMENT,</span><span style="color: #800000">"""</span> %<span style="color: #000000"> table
</span><span style="color: #0000ff">for</span> col_name, col_type <span style="color: #0000ff">in</span><span style="color: #000000"> zip(header, col_types):
schema_sql </span>+= <span style="color: #800000">'</span><span style="color: #800000">\n%s %s,</span><span style="color: #800000">'</span> %<span style="color: #000000"> (col_name, col_type)
schema_sql </span>+= <span style="color: #800000">"""</span><span style="color: #800000">\nPRIMARY KEY (id)
) DEFAULT CHARSET=utf8;</span><span style="color: #800000">"""</span>
<span style="color: #0000ff">return</span><span style="color: #000000"> schema_sql
</span><span style="color: #0000ff">def</span><span style="color: #000000"> get_insert(table, header):
</span><span style="color: #800000">"""</span><span style="color: #800000">Generate the SQL for inserting rows
</span><span style="color: #800000">"""</span><span style="color: #000000">
field_names </span>= <span style="color: #800000">'</span><span style="color: #800000">, </span><span style="color: #800000">'</span><span style="color: #000000">.join(header)
field_markers </span>= <span style="color: #800000">'</span><span style="color: #800000">, </span><span style="color: #800000">'</span>.join(<span style="color: #800000">'</span><span style="color: #800000">%s</span><span style="color: #800000">'</span> <span style="color: #0000ff">for</span> col <span style="color: #0000ff">in</span><span style="color: #000000"> header)
</span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">INSERT INTO %s (%s) VALUES (%s);</span><span style="color: #800000">'</span> %<span style="color: #000000"> \
(table, field_names, field_markers)
</span><span style="color: #0000ff">def</span><span style="color: #000000"> format_header(row):
</span><span style="color: #800000">"""</span><span style="color: #800000">Format column names to remove illegal characters and duplicates
</span><span style="color: #800000">"""</span><span style="color: #000000">
safe_col </span>= <span style="color: #0000ff">lambda</span> s: re.sub(<span style="color: #800000">'</span><span style="color: #800000">\W+</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">_</span><span style="color: #800000">'</span>, s.lower()).strip(<span style="color: #800000">'</span><span style="color: #800000">_</span><span style="color: #800000">'</span><span style="color: #000000">)
header </span>=<span style="color: #000000"> []
counts </span>=<span style="color: #000000"> collections.defaultdict(int)
</span><span style="color: #0000ff">for</span> col <span style="color: #0000ff">in</span><span style="color: #000000"> row:
col </span>=<span style="color: #000000"> safe_col(col)
counts[col] </span>+= 1
<span style="color: #0000ff">if</span> counts[col] > 1<span style="color: #000000">:
col </span>= <span style="color: #800000">'</span><span style="color: #800000">{}{}</span><span style="color: #800000">'</span><span style="color: #000000">.format(col, counts[col])
header.append(col)
</span><span style="color: #0000ff">return</span><span style="color: #000000"> header
</span><span style="color: #0000ff">def</span> main(input_file, user, password, host, table, database, max_inserts=10000<span style="color: #000000">):
</span><span style="color: #0000ff">print</span> <span style="color: #800000">"</span><span style="color: #800000">Importing `%s' into MySQL database `%s.%s'</span><span style="color: #800000">"</span> %<span style="color: #000000"> (input_file, database, table)
db </span>= MySQLdb.connect(host=host, user=user, passwd=password, charset=<span style="color: #800000">'</span><span style="color: #800000">utf8</span><span style="color: #800000">'</span><span style="color: #000000">)
cursor </span>=<span style="color: #000000"> db.cursor()
</span><span style="color: #008000">#</span><span style="color: #008000"> create database and if doesn't exist</span>
cursor.execute(<span style="color: #800000">'</span><span style="color: #800000">CREATE DATABASE IF NOT EXISTS %s;</span><span style="color: #800000">'</span> %<span style="color: #000000"> database)
db.select_db(database)
</span><span style="color: #008000">#</span><span style="color: #008000"> define table</span>
<span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">Analyzing column types ...</span><span style="color: #800000">'</span><span style="color: #000000">
col_types </span>=<span style="color: #000000"> get_col_types(input_file)
</span><span style="color: #0000ff">print</span><span style="color: #000000"> col_types
header </span>=<span style="color: #000000"> None
</span><span style="color: #0000ff">for</span> i, row <span style="color: #0000ff">in</span><span style="color: #000000"> enumerate(csv.reader(open(input_file))):
</span><span style="color: #0000ff">if</span><span style="color: #000000"> header:
</span><span style="color: #0000ff">while</span> len(row) <<span style="color: #000000"> len(header):
row.append(</span><span style="color: #800000">''</span>) <span style="color: #008000">#</span><span style="color: #008000"> this row is missing columns so pad blank values</span>
<span style="color: #000000"> cursor.execute(insert_sql, row)
</span><span style="color: #0000ff">if</span> i % max_inserts ==<span style="color: #000000"> 0:
db.commit()
</span><span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">commit</span><span style="color: #800000">'</span>
<span style="color: #0000ff">else</span><span style="color: #000000">:
header </span>=<span style="color: #000000"> format_header(row)
schema_sql </span>=<span style="color: #000000"> get_schema(table, header, col_types)
</span><span style="color: #0000ff">print</span><span style="color: #000000"> schema_sql
</span><span style="color: #008000">#</span><span style="color: #008000"> create table</span>
cursor.execute(<span style="color: #800000">'</span><span style="color: #800000">DROP TABLE IF EXISTS %s;</span><span style="color: #800000">'</span> %<span style="color: #000000"> table)
cursor.execute(schema_sql)
</span><span style="color: #008000">#</span><span style="color: #008000"> create index for more efficient access</span>
<span style="color: #0000ff">try</span><span style="color: #000000">:
cursor.execute(</span><span style="color: #800000">'</span><span style="color: #800000">CREATE INDEX ids ON %s (id);</span><span style="color: #800000">'</span> %<span style="color: #000000"> table)
</span><span style="color: #0000ff">except</span><span style="color: #000000"> MySQLdb.OperationalError:
</span><span style="color: #0000ff">pass</span> <span style="color: #008000">#</span><span style="color: #008000"> index already exists</span>
<span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">Inserting rows ...</span><span style="color: #800000">'</span>
<span style="color: #008000">#</span><span style="color: #008000"> SQL string for inserting data</span>
insert_sql =<span style="color: #000000"> get_insert(table, header)
</span><span style="color: #008000">#</span><span style="color: #008000"> commit rows to database</span>
<span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">Committing rows to database ...</span><span style="color: #800000">'</span><span style="color: #000000">
db.commit()
</span><span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">Done!</span><span style="color: #800000">'</span>
<span style="color: #0000ff">if</span> <span style="color: #800080">__name__</span> == <span style="color: #800000">'</span><span style="color: #800000">__main__</span><span style="color: #800000">'</span><span style="color: #000000">:
parser </span>= argparse.ArgumentParser(description=<span style="color: #800000">'</span><span style="color: #800000">Automatically insert CSV contents into MySQL</span><span style="color: #800000">'</span><span style="color: #000000">)
parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--table</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">table</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">Set the name of the table. If not set the CSV filename will be used</span><span style="color: #800000">'</span><span style="color: #000000">)
parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--database</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">database</span><span style="color: #800000">'</span>, default=<span style="color: #800000">'</span><span style="color: #800000">test</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">Set the name of the database. If not set the test database will be used</span><span style="color: #800000">'</span><span style="color: #000000">)
parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--user</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">user</span><span style="color: #800000">'</span>, default=<span style="color: #800000">'</span><span style="color: #800000">root</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">The MySQL login username</span><span style="color: #800000">'</span><span style="color: #000000">)
parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--password</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">password</span><span style="color: #800000">'</span>, default=<span style="color: #800000">''</span>, help=<span style="color: #800000">'</span><span style="color: #800000">The MySQL login password</span><span style="color: #800000">'</span><span style="color: #000000">)
parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--host</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">host</span><span style="color: #800000">'</span>, default=<span style="color: #800000">'</span><span style="color: #800000">localhost</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">The MySQL host</span><span style="color: #800000">'</span><span style="color: #000000">)
parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">input_file</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">The input CSV file</span><span style="color: #800000">'</span><span style="color: #000000">)
args </span>= parser.parse_args(sys.argv[1<span style="color: #000000">:])
</span><span style="color: #0000ff">if</span> <span style="color: #0000ff">not</span><span style="color: #000000"> args.table:
</span><span style="color: #008000">#</span><span style="color: #008000"> use input file name for table</span>
args.table =<span style="color: #000000"> os.path.splitext(os.path.basename(args.input_file))[0]
main(args.input_file, args.user, args.password, args.host, args.table, args.database)</span>具体使用例子如下
[root@server1]# python csv2mysql.py --host=172.20.197.61 --user=PdYRxGWNpVRCQfHj --password=RX5a5YsViQcDdywr --database=cf_dc61100a_92a7_43ca_81dd_2a7e3fa0808a --table=performance_history_2 performance_history_2.csv
Importing `performance_history_2.csv' into MySQL database `cf_dc61100a_92a7_43ca_81dd_2a7e3fa0808a.performance_history_2'
Analyzing column types ...
['datetime', 'varchar(255)', 'varchar(255)', 'varchar(255)', 'varchar(255)', 'varchar(255)', 'varchar(255)', 'varchar(255)']
CREATE TABLE IF NOT EXISTS performance_history_2 (
id int NOT NULL AUTO_INCREMENT,
date_time datetime,
write_bw_mb_s varchar(255),
read_bw_mb_s varchar(255),
write_iops varchar(255),
read_iops varchar(255),
write_latency_usec varchar(255),
read_latency_usec varchar(255),
avg_latency_usec varchar(255),
PRIMARY KEY (id)
) DEFAULT CHARSET=utf8;
Inserting rows ...
commit
commit
commit
commit
commit
commit
commit
commit
commit
commit
commit
Committing rows to database ...
Done!
以上就是csv2mysql的详细内容,更多请关注php中文网其它相关文章!
每个人都需要一台速度更快、更稳定的 PC。随着时间的推移,垃圾文件、旧注册表数据和不必要的后台进程会占用资源并降低性能。幸运的是,许多工具可以让 Windows 保持平稳运行。
Copyright 2014-2025 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号