Compare commits
No commits in common. "master" and "gh-pages" have entirely different histories.
|
@ -1,6 +0,0 @@
|
|||
lib
|
||||
target
|
||||
/.settings
|
||||
/.classpath
|
||||
/.project
|
||||
|
202
LICENSE.txt
202
LICENSE.txt
|
@ -1,202 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,94 @@
|
|||
.com { color: #93a1a1; }
|
||||
.lit { color: #195f91; }
|
||||
.pun, .opn, .clo { color: #93a1a1; }
|
||||
.fun { color: #dc322f; }
|
||||
.str, .atv { color: #268bd2; }
|
||||
.kwd, .tag { color: #195f91; }
|
||||
.typ, .atn, .dec, .var { color: #CB4B16; }
|
||||
/* .pln { color: #93a1a1; } */
|
||||
.prettyprint {
|
||||
background-color: #fefbf3;
|
||||
padding: 9px;
|
||||
border: 1px solid rgba(0,0,0,.2);
|
||||
-webkit-box-shadow: 0 1px 2px rgba(0,0,0,.1);
|
||||
-moz-box-shadow: 0 1px 2px rgba(0,0,0,.1);
|
||||
box-shadow: 0 1px 2px rgba(0,0,0,.1);
|
||||
}
|
||||
|
||||
/* Specify class=linenums on a pre to get line numbering */
|
||||
ol.linenums {
|
||||
margin: 0 0 0 40px;
|
||||
}
|
||||
/* IE indents via margin-left */
|
||||
ol.linenums li {
|
||||
padding: 0 5px;
|
||||
color: rgba(0,0,0,.15);
|
||||
line-height: 20px;
|
||||
-webkit-border-radius: 2px;
|
||||
-moz-border-radius: 2px;
|
||||
border-radius: 2px;
|
||||
}
|
||||
/* Alternate shading for lines */
|
||||
li.L1, li.L3, li.L5, li.L7, li.L9 { }
|
||||
|
||||
/*
|
||||
$base03: #002b36;
|
||||
$base02: #073642;
|
||||
$base01: #586e75;
|
||||
$base00: #657b83;
|
||||
$base0: #839496;
|
||||
$base1: #93a1a1;
|
||||
$base2: #eee8d5;
|
||||
$base3: #fdf6e3;
|
||||
$yellow: #b58900;
|
||||
$orange: #cb4b16;
|
||||
$red: #dc322f;
|
||||
$magenta: #d33682;
|
||||
$violet: #6c71c4;
|
||||
$blue: #268bd2;
|
||||
$cyan: #2aa198;
|
||||
$green: #859900;
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
#1d1f21 Background
|
||||
#282a2e Current Line
|
||||
#373b41 Selection
|
||||
#c5c8c6 Foreground
|
||||
#969896 Comment
|
||||
#cc6666 Red
|
||||
#de935f Orange
|
||||
#f0c674 Yellow
|
||||
#b5bd68 Green
|
||||
#8abeb7 Aqua
|
||||
#81a2be Blue
|
||||
#b294bb Purple
|
||||
*/
|
||||
|
||||
|
||||
/* DARK THEME */
|
||||
/* ---------- */
|
||||
|
||||
.prettyprint-dark {
|
||||
background-color: #1d1f21;
|
||||
border: 0;
|
||||
padding: 10px;
|
||||
}
|
||||
.prettyprint-dark .linenums li {
|
||||
color: #444;
|
||||
}
|
||||
.prettyprint-dark .linenums li:hover {
|
||||
background-color: #282a2e;
|
||||
}
|
||||
/* tags in html */
|
||||
.prettyprint-dark .kwd,
|
||||
.prettyprint-dark .tag { color: #cc6666; }
|
||||
/* html attr */
|
||||
.prettyprint-dark .typ,
|
||||
.prettyprint-dark .atn,
|
||||
.prettyprint-dark .dec,
|
||||
.prettyprint-dark .var { color: #de935f; }
|
||||
/* html attr values */
|
||||
.prettyprint-dark .str,
|
||||
.prettyprint-dark .atv { color: #b5bd68; }
|
|
@ -0,0 +1,108 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>csv-serde : hive csv support</title>
|
||||
<meta name="description" content="">
|
||||
<meta name="author" content="">
|
||||
|
||||
<!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
|
||||
<!--[if lt IE 9]>
|
||||
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<link rel="stylesheet" href="http://twitter.github.com/bootstrap/1.4.0/bootstrap.min.css">
|
||||
<style type="text/css">
|
||||
body {
|
||||
padding-top: 60px;
|
||||
}
|
||||
</style>
|
||||
<script type="text/javascript" src="http://google-code-prettify.googlecode.com/svn/trunk/src/prettify.js"></script>
|
||||
<link rel="stylesheet" href="css/prettify.css">
|
||||
</head>
|
||||
|
||||
<body onload="prettyPrint();">
|
||||
|
||||
<div class="topbar">
|
||||
<div class="fill">
|
||||
<div class="container">
|
||||
<a class="brand" href="#">csv-serde</a>
|
||||
<ul class="nav">
|
||||
<li><a href="https://github.com/ogrodnek/csv-serde">Source</a></li>
|
||||
<li><a href="https://github.com/ogrodnek/csv-serde/downloads">Downloads</a></li>
|
||||
<li><a href="https://github.com/ogrodnek/csv-serde/issues">Report Issue</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="container">
|
||||
|
||||
<!-- Main hero unit for a primary marketing message or call to action -->
|
||||
<div class="hero-unit">
|
||||
<h1>csv-serde</h1>
|
||||
<h2>Hive CSV Support</h2>
|
||||
<p>csv-serde adds real CSV support to <a href="http://hive.apache.org/">hive</a> using <a href="http://opencsv.sourceforge.net/">opencsv</a>.</p>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="span5">
|
||||
<h2>Download</h2>
|
||||
<p>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="https://github.com/downloads/ogrodnek/csv-serde/csv-serde-1.1.2.jar">csv-serde-1.1.2.jar</a>
|
||||
</li>
|
||||
<li><a href="https://github.com/ogrodnek/csv-serde/zipball/master">csv-serde-master-src.zip</a></li>
|
||||
|
||||
</p>
|
||||
</div>
|
||||
<div class="span5">
|
||||
<h2>License</h2>
|
||||
<p>
|
||||
csv-serde is open source and licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0.html">Apache 2 License</a>.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="span10">
|
||||
<h2>Using it</h2>
|
||||
<pre class="prettyprint">
|
||||
add jar path/to/csv-serde.jar;
|
||||
|
||||
create table my_table(a string, b string, ...)
|
||||
row format serde 'com.bizo.hive.serde.csv.CSVSerde'
|
||||
stored as textfile
|
||||
;
|
||||
</pre>
|
||||
|
||||
<p>You can also specify custom separator, quote, or escape
|
||||
characters.</p>
|
||||
<pre class="prettyprint">
|
||||
add jar path/to/csv-serde.jar;
|
||||
|
||||
create table my_table(a string, b string, ...)
|
||||
row format serde 'com.bizo.hive.serde.csv.CSVSerde'
|
||||
with serdeproperties (
|
||||
"separatorChar" = "\t",
|
||||
"quoteChar" = "'",
|
||||
"escapeChar" = "\\"
|
||||
)
|
||||
stored as textfile
|
||||
;
|
||||
</pre>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<footer>
|
||||
<p></p>
|
||||
</footer>
|
||||
|
||||
</div> <!-- /container -->
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,88 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset='utf-8'>
|
||||
|
||||
<title>ogrodnek/csv-serde @ GitHub</title>
|
||||
|
||||
<style type="text/css">
|
||||
body {
|
||||
margin-top: 1.0em;
|
||||
background-color: #71dc27;
|
||||
font-family: Helvetica, Arial, FreeSans, san-serif;
|
||||
color: #ffffff;
|
||||
}
|
||||
#container {
|
||||
margin: 0 auto;
|
||||
width: 700px;
|
||||
}
|
||||
h1 { font-size: 3.8em; color: #8e23d8; margin-bottom: 3px; }
|
||||
h1 .small { font-size: 0.4em; }
|
||||
h1 a { text-decoration: none }
|
||||
h2 { font-size: 1.5em; color: #8e23d8; }
|
||||
h3 { text-align: center; color: #8e23d8; }
|
||||
a { color: #8e23d8; }
|
||||
.description { font-size: 1.2em; margin-bottom: 30px; margin-top: 30px; font-style: italic;}
|
||||
.download { float: right; }
|
||||
pre { background: #000; color: #fff; padding: 15px;}
|
||||
hr { border: 0; width: 80%; border-bottom: 1px solid #aaa}
|
||||
.footer { text-align:center; padding-top:30px; font-style: italic; }
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<a href="https://github.com/ogrodnek/csv-serde"><img style="position: absolute; top: 0; right: 0; border: 0;" src="http://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub" /></a>
|
||||
|
||||
<div id="container">
|
||||
|
||||
<div class="download">
|
||||
<a href="https://github.com/ogrodnek/csv-serde/zipball/master">
|
||||
<img border="0" width="90" src="https://github.com/images/modules/download/zip.png"></a>
|
||||
<a href="https://github.com/ogrodnek/csv-serde/tarball/master">
|
||||
<img border="0" width="90" src="https://github.com/images/modules/download/tar.png"></a>
|
||||
</div>
|
||||
|
||||
<h1><a href="https://github.com/ogrodnek/csv-serde">csv-serde</a>
|
||||
<span class="small">by <a href="https://github.com/ogrodnek">ogrodnek</a></span></h1>
|
||||
|
||||
<div class="description">
|
||||
Hive SerDe for CSV
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<h2>Authors</h2>
|
||||
<p>Larry Ogrodnek (ogrodnek@gmail.com)
<br/> </p>
|
||||
|
||||
|
||||
|
||||
<h2>Contact</h2>
|
||||
<p>Larry Ogrodnek (ogrodnek@gmail.com)
<br/> </p>
|
||||
|
||||
|
||||
<h2>Download</h2>
|
||||
<p>
|
||||
You can download this project in either
|
||||
<a href="https://github.com/ogrodnek/csv-serde/zipball/master">zip</a> or
|
||||
<a href="https://github.com/ogrodnek/csv-serde/tarball/master">tar formats.
|
||||
</p>
|
||||
<p>You can also clone the project with <a href="http://git-scm.com">Git</a>
|
||||
by running:
|
||||
<pre>$ git clone git://github.com/ogrodnek/csv-serde</pre>
|
||||
</p>
|
||||
|
||||
<div class="footer">
|
||||
get the source code on GitHub : <a href="https://github.com/ogrodnek/csv-serde">ogrodnek/csv-serde</a>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
96
pom.xml
96
pom.xml
|
@ -1,96 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>com.bizo</groupId>
|
||||
<artifactId>csv-serde</artifactId>
|
||||
<version>1.1.2-0.11.0</version>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>net.sf.opencsv</groupId>
|
||||
<artifactId>opencsv</artifactId>
|
||||
<version>2.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-exec</artifactId>
|
||||
<version>0.11.0</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-core</artifactId>
|
||||
<version>1.0.3</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.jdo</groupId>
|
||||
<artifactId>jdo2-api</artifactId>
|
||||
<version>2.3-20090302111651</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.11</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<properties>
|
||||
<!-- use UTF-8 for everything -->
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||
</properties>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.1</version>
|
||||
<configuration>
|
||||
<source>1.6</source>
|
||||
<target>1.6</target>
|
||||
<encoding>UTF-8</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.2.1</version>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/jar-with-dependencies.xml</descriptor>
|
||||
</descriptors>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>assemble-all</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<version>3.0</version>
|
||||
<configuration>
|
||||
<reportPlugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>cobertura-maven-plugin</artifactId>
|
||||
<version>2.5.1</version>
|
||||
<configuration>
|
||||
<formats>
|
||||
<format>html</format>
|
||||
<format>xml</format>
|
||||
</formats>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</reportPlugins>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
65
readme.md
65
readme.md
|
@ -1,65 +0,0 @@
|
|||
# Hive CSV Support
|
||||
|
||||
[![Build Status](https://drone.io/github.com/ogrodnek/csv-serde/status.png)](https://drone.io/github.com/ogrodnek/csv-serde/latest)
|
||||
|
||||
This SerDe adds *real* CSV input and ouput support to hive using the excellent [opencsv](http://opencsv.sourceforge.net/) library.
|
||||
|
||||
## Using
|
||||
|
||||
|
||||
### Basic Use
|
||||
|
||||
```
|
||||
add jar path/to/csv-serde.jar;
|
||||
|
||||
create table my_table(a string, b string, ...)
|
||||
row format serde 'com.bizo.hive.serde.csv.CSVSerde'
|
||||
stored as textfile
|
||||
;
|
||||
```
|
||||
|
||||
### Custom formatting
|
||||
|
||||
The default separator, quote, and escape characters from the `opencsv` library are:
|
||||
|
||||
```
|
||||
DEFAULT_ESCAPE_CHARACTER \
|
||||
DEFAULT_QUOTE_CHARACTER "
|
||||
DEFAULT_SEPARATOR ,
|
||||
```
|
||||
|
||||
You can also specify custom separator, quote, or escape characters.
|
||||
|
||||
```
|
||||
add jar path/to/csv-serde.jar;
|
||||
|
||||
create table my_table(a string, b string, ...)
|
||||
row format serde 'com.bizo.hive.serde.csv.CSVSerde'
|
||||
with serdeproperties (
|
||||
"separatorChar" = "\t",
|
||||
"quoteChar" = "'",
|
||||
"escapeChar" = "\\"
|
||||
)
|
||||
stored as textfile
|
||||
;
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
The following include opencsv along with the serde, so only the single jar is needed. Currently built against Hive 0.11.0, but should be compatible with other hive versions.
|
||||
|
||||
* [csv-serde-1.1.2-0.11.0-all.jar](https://drone.io/github.com/ogrodnek/csv-serde/files/target/csv-serde-1.1.2-0.11.0-all.jar)
|
||||
|
||||
|
||||
## Building
|
||||
|
||||
Run `mvn package` to build. Both a basic artifact as well as a "fat jar" (with opencsv) are produced.
|
||||
|
||||
### Eclipse support
|
||||
|
||||
Run `mvn eclipse:eclipse` to generate `.project` and `.classpath` files for eclipse.
|
||||
|
||||
|
||||
## License
|
||||
|
||||
csv-serde is open source and licensed under the [Apache 2 License](http://www.apache.org/licenses/LICENSE-2.0.html).
|
|
@ -1,17 +0,0 @@
|
|||
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id>all</id>
|
||||
<formats>
|
||||
<format>jar</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useProjectArtifact>true</useProjectArtifact>
|
||||
<unpack>true</unpack>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
|
@ -1,185 +0,0 @@
|
|||
package com.bizo.hive.serde.csv;
|
||||
|
||||
import au.com.bytecode.opencsv.CSVReader;
|
||||
import au.com.bytecode.opencsv.CSVWriter;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hive.serde.Constants;
|
||||
import org.apache.hadoop.hive.serde2.SerDe;
|
||||
import org.apache.hadoop.hive.serde2.SerDeException;
|
||||
import org.apache.hadoop.hive.serde2.SerDeStats;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
|
||||
/**
|
||||
* CSVSerde uses opencsv (http://opencsv.sourceforge.net/) to serialize/deserialize columns as CSV.
|
||||
*
|
||||
* @author Larry Ogrodnek <ogrodnek@gmail.com>
|
||||
*/
|
||||
public final class CSVSerde implements SerDe {
|
||||
|
||||
private ObjectInspector inspector;
|
||||
private String[] outputFields;
|
||||
private int numCols;
|
||||
private List<String> row;
|
||||
|
||||
private char separatorChar;
|
||||
private char quoteChar;
|
||||
private char escapeChar;
|
||||
private String lineEnd;
|
||||
private String nullDefinedAs;
|
||||
|
||||
@Override
|
||||
public void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
|
||||
final List<String> columnNames = Arrays.asList(tbl.getProperty(Constants.LIST_COLUMNS).split(","));
|
||||
final List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(Constants.LIST_COLUMN_TYPES));
|
||||
|
||||
numCols = columnNames.size();
|
||||
|
||||
final List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols);
|
||||
|
||||
for (int i=0; i< numCols; i++) {
|
||||
columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
|
||||
}
|
||||
|
||||
this.inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
|
||||
this.outputFields = new String[numCols];
|
||||
row = new ArrayList<String>(numCols);
|
||||
|
||||
for (int i=0; i< numCols; i++) {
|
||||
row.add(null);
|
||||
}
|
||||
|
||||
separatorChar = getProperty(tbl, "separatorChar", CSVWriter.DEFAULT_SEPARATOR);
|
||||
quoteChar = getProperty(tbl, "quoteChar", CSVWriter.DEFAULT_QUOTE_CHARACTER);
|
||||
escapeChar = getProperty(tbl, "escapeChar", CSVWriter.DEFAULT_ESCAPE_CHARACTER);
|
||||
lineEnd = tbl.getProperty("lineEnd", CSVWriter.DEFAULT_LINE_END);
|
||||
nullDefinedAs = tbl.getProperty("nullDefinedAs", "\\N");
|
||||
}
|
||||
|
||||
private final char getProperty(final Properties tbl, final String property, final char def) {
|
||||
final String val = tbl.getProperty(property);
|
||||
|
||||
if (val != null) {
|
||||
return val.charAt(0);
|
||||
}
|
||||
|
||||
return def;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
|
||||
final StructObjectInspector outputRowOI = (StructObjectInspector) objInspector;
|
||||
final List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs();
|
||||
|
||||
if (outputFieldRefs.size() != numCols) {
|
||||
throw new SerDeException("Cannot serialize the object because there are "
|
||||
+ outputFieldRefs.size() + " fields but the table has " + numCols + " columns.");
|
||||
}
|
||||
|
||||
// Get all data out.
|
||||
for (int c = 0; c < numCols; c++) {
|
||||
final Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c));
|
||||
final ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector();
|
||||
|
||||
// The data must be of type String
|
||||
final StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;
|
||||
|
||||
// Convert the field to Java class String, because objects of String type
|
||||
// can be stored in String, Text, or some other classes.
|
||||
outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
|
||||
if (outputFields[c] == null) {
|
||||
outputFields[c] = nullDefinedAs;
|
||||
}
|
||||
}
|
||||
|
||||
final StringWriter writer = new StringWriter();
|
||||
final CSVWriter csv = newWriter(writer, separatorChar, quoteChar, escapeChar, lineEnd);
|
||||
|
||||
try {
|
||||
csv.writeNext(outputFields);
|
||||
csv.close();
|
||||
|
||||
return new Text(writer.toString());
|
||||
} catch (final IOException ioe) {
|
||||
throw new SerDeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object deserialize(final Writable blob) throws SerDeException {
|
||||
Text rowText = (Text) blob;
|
||||
|
||||
CSVReader csv = null;
|
||||
try {
|
||||
csv = newReader(new CharArrayReader(rowText.toString().toCharArray()), separatorChar, quoteChar, escapeChar);
|
||||
final String[] read = csv.readNext();
|
||||
|
||||
for (int i=0; i< numCols; i++) {
|
||||
if (read != null && i < read.length) {
|
||||
row.set(i, read[i] == nullDefinedAs ? null : read[i]);
|
||||
} else {
|
||||
row.set(i, null);
|
||||
}
|
||||
}
|
||||
|
||||
return row;
|
||||
} catch (final Exception e) {
|
||||
throw new SerDeException(e);
|
||||
} finally {
|
||||
if (csv != null) {
|
||||
try {
|
||||
csv.close();
|
||||
} catch (final Exception e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static CSVReader newReader(final Reader reader, char separator, char quote, char escape) {
|
||||
// CSVReader will throw an exception if any of separator, quote, or escape is the same, but
|
||||
// the CSV format specifies that the escape character and quote char are the same... very weird
|
||||
if (CSVWriter.DEFAULT_ESCAPE_CHARACTER == escape) {
|
||||
return new CSVReader(reader, separator, quote);
|
||||
} else {
|
||||
return new CSVReader(reader, separator, quote, escape);
|
||||
}
|
||||
}
|
||||
|
||||
private static CSVWriter newWriter(final Writer writer, char separator, char quote, char escape, String lineEnd) {
|
||||
if (CSVWriter.DEFAULT_ESCAPE_CHARACTER == escape) {
|
||||
return new CSVWriter(writer, separator, quote, lineEnd);
|
||||
} else {
|
||||
return new CSVWriter(writer, separator, quote, escape, lineEnd);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ObjectInspector getObjectInspector() throws SerDeException {
|
||||
return inspector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<? extends Writable> getSerializedClass() {
|
||||
return Text.class;
|
||||
}
|
||||
|
||||
public SerDeStats getSerDeStats() {
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
package com.bizo.hive.serde.csv;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.hadoop.hive.serde.Constants;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
|
||||
public final class CSVSerdeTest {
|
||||
private final CSVSerde csv = new CSVSerde();
|
||||
final Properties props = new Properties();
|
||||
|
||||
@Before
|
||||
public void setup() throws Exception {
|
||||
props.put(Constants.LIST_COLUMNS, "a,b,c");
|
||||
props.put(Constants.LIST_COLUMN_TYPES, "string,string,string");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeserialize() throws Exception {
|
||||
csv.initialize(null, props);
|
||||
final Text in = new Text("hello,\"yes, okay\",1");
|
||||
|
||||
final List<String> row = (List<String>) csv.deserialize(in);
|
||||
|
||||
assertEquals("hello", row.get(0));
|
||||
assertEquals("yes, okay", row.get(1));
|
||||
assertEquals("1", row.get(2));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDeserializeCustomSeparators() throws Exception {
|
||||
props.put("separatorChar", "\t");
|
||||
props.put("quoteChar", "'");
|
||||
|
||||
csv.initialize(null, props);
|
||||
|
||||
final Text in = new Text("hello\t'yes\tokay'\t1");
|
||||
final List<String> row = (List<String>) csv.deserialize(in);
|
||||
|
||||
assertEquals("hello", row.get(0));
|
||||
assertEquals("yes\tokay", row.get(1));
|
||||
assertEquals("1", row.get(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeserializeCustomEscape() throws Exception {
|
||||
props.put("quoteChar", "'");
|
||||
props.put("escapeChar", "\\");
|
||||
|
||||
csv.initialize(null, props);
|
||||
|
||||
final Text in = new Text("hello,'yes\\'okay',1");
|
||||
final List<String> row = (List<String>) csv.deserialize(in);
|
||||
|
||||
assertEquals("hello", row.get(0));
|
||||
assertEquals("yes'okay", row.get(1));
|
||||
assertEquals("1", row.get(2));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue