avro-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Steven Willis <swil...@compete.com>
Subject RE: How to handle schema dependencies
Date Tue, 11 Dec 2012 21:42:26 GMT
Thanks Dave,

I was thinking about doing something like that (adding the schemas as resources in the jars).
It just seems like a lot of work for something that could be automatic. It would be nice if
during schema parsing we could specify a classpath to be used for dynamic lookup of external
schemas. That way Schema.parse could look up already schemas that have already been compiled
like this:

package com.compete.util;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericContainer;

public class Schemas {
    public static void main(String args[]) {
        String name = "Date";
        String space = "com.compete.avro";

        try {
            // If we were inside Schema.parse we would probably do:
            // Class.forName(new Name(name, space).toString())

            Class<?> cls = Class.forName(space+"."+name);
            GenericContainer record = (GenericContainer)cls.newInstance();
            Schema schema = record.getSchema();

            // if we were inside Schema.parse(JsonNode schema, Names names) we
            // could now just call: names.put(new Name(name, space), schema);

            System.out.println(schema.toString());
        } catch(ClassNotFoundException e) {
            System.err.println(e);
            System.exit(1);
        } catch(InstantiationException e) {
            System.err.println(e);
            System.exit(1);
        } catch(IllegalAccessException e) {
            System.err.println(e);
            System.exit(1);
        }
    }
}

-Steven Willis

> -----Original Message-----
> From: Dave McAlpin [mailto:dmcalpin@intelius.com]
> Sent: Tuesday, December 11, 2012 3:32 PM
> To: user@avro.apache.org
> Subject: RE: How to handle schema dependencies
> 
> I had the same problem. My solution was to package external schema
> files into a source jar and have Maven download and extract those
> source jars at code generation time. After generation, I delete the
> external schema along with their generated code and depend on an
> external jar file at runtime.
> 
> I use IDL instead of Avro schema, so this approach might not work for
> you, but here's what I did.
> 
> In the external project (the one I want to import), I changed the
> pom.xml to package schemas into a source jar. Note that the fragment
> below assumes that the Avro schema is stored in src/main/schema, but
> there's nothing special about that location. Also note that I exclude
> generated Java files from the source jar.
> 
>    <build>
>       <resources>
>          <resource>
>             <directory>${project.basedir}/src/main/schema</directory>
>          </resource>
>       </resources>
>       <plugins>
> 	   .
>          .
>          .
>          <plugin>
>             <groupId>org.apache.maven.plugins</groupId>
>             <artifactId>maven-source-plugin</artifactId>
>             <version>2.2.1</version>
>             <executions>
>                <execution>
>                   <id>attach-avdl</id>
>                   <phase>verify</phase>
>                   <goals>
>                      <goal>jar-no-fork</goal>
>                   </goals>
>                   <configuration>
>                      <includePom>true</includePom>
>                      <excludes>
>                         <exclude>**/*.java</exclude>
>                      </excludes>
>                      <includes>
>                         <include>*.avdl</include>
>                      </includes>
>                   </configuration>
>                </execution>
>             </executions>
>          </plugin>
>       </plugins>
>    </build>
> 
> 
> In the project that uses the external schemas, I changed the pom.xml to
> pull in those schemas as external dependencies and delete them after
> code generation is complete. I also delete the generated java files
> that result from those external schema because I want to use the
> generated class files from an external jar rather than the locally
> generated versions.
> 
> ***PLEASE NOTE*** that the code below deletes files as part of clean
> up. To use this, you'll need to update the PATH/TO placeholders. If you
> try this on a real project, MAKE SURE it's backed up before you start
> testing this.
> 
>    <build>
>       <plugins>
>          .
>          .
>          .
>          <plugin>
>             <artifactId>maven-clean-plugin</artifactId>
>             <version>2.5</version>
>             <executions>
>                <execution>
>                   <id>clean-generated-java</id>
>                   <phase>clean</phase>
>                   <goals>
>                      <goal>clean</goal>
>                   </goals>
>                   <configuration>
>                      <filesets>
>                         <fileset>
> 
> <directory>src/main/java/PATH/TO/YOUR/GENERATED/DIRECTORY</directory>
>                            <includes>
>                               <include>*.java</include>
>                            </includes>
>                         </fileset>
>                      </filesets>
>                   </configuration>
>                </execution>
>                <execution>
>                   <id>postgen-clean</id>
>                   <phase>compile</phase>
>                   <goals>
>                      <goal>clean</goal>
>                   </goals>
>                   <configuration>
> 
> <excludeDefaultDirectories>true</excludeDefaultDirectories>
>                      <filesets>
>                         <fileset>
>                            <directory>src/main/schema</directory>
>                            <includes>
>                               <include>external/*</include>
>                               <include>external</include>
>                            </includes>
>                         </fileset>
>                         <fileset>
>                            <directory>src/main/java</directory>
>                            <includes>
>                               <include>**/*</include>
>                            </includes>
>                            <excludes>
> 
> <exclude>PATH/TO/YOUR/GENERATED/SOURCE/DIRECTORY/RELATIVE/TO/SRC/MAIN/J
> AVA/*</exclude>
>                            </excludes>
>                         </fileset>
>                      </filesets>
>                   </configuration>
>                </execution>
>             </executions>
>          </plugin>
>          <plugin>
>             <groupId>org.apache.maven.plugins</groupId>
>             <artifactId>maven-dependency-plugin</artifactId>
>             <version>2.5.1</version>
>             <executions>
>                <execution>
>                   <id>import-avdl</id>
>                   <phase>initialize</phase>
>                   <goals>
>                      <goal>unpack-dependencies</goal>
>                   </goals>
>                   <configuration>
>                      <includes>*.avdl</includes>
>                      <!-- Limit group ids like this to avoid pulling
> down source for third party projects
> 
> <includeGroupIds>com.example,net.example</includeGroupIds>
>                      -->
>                      <classifier>sources</classifier>
> 
> <failOnMissingClassifierArtifact>true</failOnMissingClassifierArtifact>
> 
> <outputDirectory>src/main/schema/external</outputDirectory>
>                   </configuration>
>                </execution>
>             </executions>
>          </plugin>
>       </plugins>
>    </build>
> 
> In the IDL file that uses these external schemas, I make my imports
> point to an "external" sub-directory, like this:
> 
>    import idl "external/Profile.avdl";
> 
> Because I'm pulling all dependent schema files into a single common
> directory named "external", schema file names need to be unique across
> all projects. That's not a problem for me, but if it is for you, you
> could come up with a more sophisticated way to unpack and reference
> those external dependencies.
> 
> Finally, in the pom file that uses those external jars, I just add a
> normal maven dependency, which pulls down source jars at generation
> time and normal jars at runtime.
> 
> Dave
> 
> -----Original Message-----
> From: Steven Willis [mailto:swillis@compete.com]
> Sent: Tuesday, December 11, 2012 11:28 AM
> To: user@avro.apache.org
> Subject: How to handle schema dependencies
> 
> Hi,
> 
> My company currently has one big repo that holds all our java code and
> avro schemas. I'm currently splitting it up into one common repo and
> separate repos for each product. This has been easy to do for our java
> code using maven and dependencies, however I can't find a way to do
> this with our avro schemas. In the common repo I've got schemas that
> everything relies on. These common schemas include some very domain
> specific stuff along with some pretty general use schemas that we've
> defined like Date:
> 
>  {"name": "Date",
>   "namespace": "com.compete.avro",
>   "type": "record",
>   "fields": [ { "name":"year", "type":"int" },
>               { "name":"month", "type":"int" },
>               { "name":"day", "type":"int" } ] }
> 
> 
> I'd like to be able to use 'Date' (defined in the common repo) in
> schemas inside the product-xyz repo. But when I try this, I get:
> 
> [ERROR] FATAL ERROR
> [INFO] ----------------------------------------------------------------
> --------
> [INFO] "Date" is not a defined name. The type of the "date" field must
> be a defined name or a {"type": ...} expression.
> [INFO] ----------------------------------------------------------------
> --------
> [INFO] Trace
> org.apache.avro.SchemaParseException: "Date" is not a defined name. The
> type of the "date" field must be a defined name or a {"type": ...}
> expression.
> 	at org.apache.avro.Schema.parse(Schema.java:1094)
> 	at org.apache.avro.Schema.parse(Schema.java:1163)
> 	at org.apache.avro.Schema$Parser.parse(Schema.java:931)
> 	at org.apache.avro.Schema$Parser.parse(Schema.java:908)
> 	at
> org.apache.avro.compiler.specific.SpecificCompiler.compileSchema(Specif
> icCompiler.java:182)
> 	at
> org.apache.avro.compiler.specific.SpecificCompiler.compileSchema(Specif
> icCompiler.java:174)
> 	at org.apache.avro.mojo.SchemaMojo.doCompile(SchemaMojo.java:53)
> 	at
> org.apache.avro.mojo.AbstractAvroMojo.compileFiles(AbstractAvroMojo.jav
> a:129)
> 	at
> org.apache.avro.mojo.AbstractAvroMojo.execute(AbstractAvroMojo.java:99)
> 	at
> org.apache.maven.plugin.DefaultPluginManager.executeMojo(DefaultPluginM
> anager.java:490)
> 	at
> org.apache.maven.lifecycle.DefaultLifecycleExecutor.executeGoals(Defaul
> tLifecycleExecutor.java:694)
> 	at
> org.apache.maven.lifecycle.DefaultLifecycleExecutor.executeGoalWithLife
> cycle(DefaultLifecycleExecutor.java:556)
> 	at
> org.apache.maven.lifecycle.DefaultLifecycleExecutor.executeGoal(Default
> LifecycleExecutor.java:535)
> 	at
> org.apache.maven.lifecycle.DefaultLifecycleExecutor.executeGoalAndHandl
> eFailures(DefaultLifecycleExecutor.java:387)
> 	at
> org.apache.maven.lifecycle.DefaultLifecycleExecutor.executeTaskSegments
> (DefaultLifecycleExecutor.java:348)
> 	at
> org.apache.maven.lifecycle.DefaultLifecycleExecutor.execute(DefaultLife
> cycleExecutor.java:180)
> 	at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:328)
> 	at org.apache.maven.DefaultMaven.execute(DefaultMaven.java:138)
> 	at org.apache.maven.cli.MavenCli.main(MavenCli.java:362)
> 	at
> org.apache.maven.cli.compat.CompatibleMain.main(CompatibleMain.java:60)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.ja
> va:57)
> 	at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccesso
> rImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:616)
> 	at
> org.codehaus.classworlds.Launcher.launchEnhanced(Launcher.java:315)
> 	at org.codehaus.classworlds.Launcher.launch(Launcher.java:255)
> 	at
> org.codehaus.classworlds.Launcher.mainWithExitCode(Launcher.java:430)
> 	at org.codehaus.classworlds.Launcher.main(Launcher.java:375)
> 
> The common jar which contains the compiled Date class is available in
> our maven repo... is there some way to use that? I'm currently using
> the avro-maven-plugin to do the java code generation, is there an
> option to this plugin to specify schemas or jars to include? It seems
> like the only work around is to put all avro schemas that we might use
> in any product in the common repo, or duplicate all the common schemas
> inside each product specific repo.
> 
> -Steven Willis

Mime
View raw message